
/*
Replication dofile for the final report for the project:
* "Do community water sources provide safe drinking water? Evidence from a randomized experiment in rural Bangladesh"

This version: Feb-2021

Notes:
- In order to run this dofile, it is necessary to save the adofiles reg_centered and xtivreg2_centered to your ado directory, stored in the subdirectory dofiles/ado.  
- You may also need to install additional STATA packages.  
- You need to replace the home directory with the location of the Replication Folder before running the code.  
*/

clear all
set more off
set maxvar 30000
set matsize 10000
eststo clear

set scheme s1mono

*cd .. (replace your own file path here and uncomment to navigation to Replication Folder) 

global data "data"
global dofiles "dofiles"
global path_tables "Tables"
global path_graphs "Graphs"

*ssc install reghdfe
*ssc install ftools
*ssc install texdoc

global repetitions_rbi = 500


* define program to calculate pvalues
capture program drop pvalue
program pvalue
global estimate = r(estimate)
global se = r(se)
global t = abs($estimate/$se)
global pvalue = 2* (1-normal($t))
end

***FIGURES: main paper

///////////////////
//// table 1 ////
///////////////////

use "$data/temp/temp_analysis.dta", clear

global vars2_a hh_arsenic_field_res hh_arsenic_any hh_arsenic_50plus hh_enterbac_fc_exp
global vars2_b arsenic_test_result_ws1 arsenic_any_ws1 arsenic_50plus_ws1 enterbac_fc_exp_ws1 
global vars2_c source_test_stored treat_d_ws1 collect_water_mins collect_per_day_qty 

global vars2 $vars2_a $vars2_b $vars2_c


label var hh_enterbac_fc_exp "Fecal contamination"
label var hh_arsenic_field_res "Arsenic contamination"
label var hh_arsenic_any "Arsenic contamination (WHO threshold)"
label var hh_arsenic_50plus "Arsenic contamination (Bangladeshi threshold)"
label var arsenic_any_ws1 "Arsenic contamination (WHO threshold)"
label var arsenic_50plus_ws1 "Arsenic contamination (Bangladeshi threshold)"
label var arsenic_test_result_ws1 "Arsenic contamination"
label var enterbac_fc_exp_ws1 "Fecal contamination"
label var source_test_stored "Household observed to store drinking water"
label var treat_d_ws1 "Household reports treating water before drinking"
label var collect_water_mins "Time needed to collect water (minutes)"


foreach var of global vars2 { 
	global lab_`var' : var label `var' 
	reg `var' cclpg_sample mics_sample [aweight=pweight_b] , nocons cluster(village_code) 
	foreach c of newlist cclpg mics {
		dis ${`var'_c_`c'}
		if _se[`c'_sample] > 1 {
			global `var'_c_`c'= trim("`: display %9.2g _b[`c'_sample]'")
			global `var'_sd_`c' = trim("`: display %9.2g _se[`c'_sample]'")
		}
		if _se[`c'_sample] <= 1 & _se[`c'_sample] >0.1 {
			global `var'_c_`c'= trim("`: display %04.2f _b[`c'_sample]'")
			global `var'_sd_`c' = trim("`: display %9.2f _se[`c'_sample]'")
		}
		if _se[`c'_sample] <= 0.1 {
			global `var'_c_`c'= trim("`: display %04.2f _b[`c'_sample]'")
			global `var'_sd_`c' = trim("`: display %9.3f _se[`c'_sample]'")
		}
		if "`var'"=="collect_water_mins" {
			global `var'_c_`c'= trim("`: display %9.2g _b[`c'_sample]'")
			global `var'_sd_`c' = trim("`: display %9.2f _se[`c'_sample]'")
		}
		dis ${`var'_sd_`c'}
	}
}
* export table 
texdoc do "$dofiles/texdoc/Table1.do"
**creates Table1.tex


///////////////////
//// table 2 //////
///////////////////

use "$data/temp/temp_ws.dta", clear

label var fu_project_tw "Difference"

foreach v of varlist fu_enterbac_fc_ws fu_arsenic_any_ws fu_arsenic_50plus_ws {
	eststo m_`v' : xi: reg `v' fu_project_tw if num_project_tws!=0 , cluster(village) absorb(village)
	sum `v' if fu_project_tw==0 & num_project_tws!=0
	estadd local mean_non_cclpg = trim("`: display %9.2f r(mean)'")
	sum `v' if fu_project_tw==1 & num_project_tws!=0
	estadd local mean_cclpg = trim("`: display %9.2f r(mean)'")
}
#delimit ;
esttab m_* using "$path_tables/Table2.tex", replace 
	cell(b(star fmt(2)) se(par fmt(2)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(fu_project_tw) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\;\; Fecal \\ contamination }"  
		"\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\;\;\; threshold) }" 
		"\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\; (Bangladeshi \\ \;\;\;\;  threshold) }" , 
		pattern(1 1 1 ) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none)
	stats( mean_cclpg mean_non_cclpg N, layout(@ @ )  
	labels( "Mean (project TW)" "Mean (other TWs)" "N") 
	fmt( %9.0g %9.0g %9.0f ))
;
#delimit cr
eststo clear


///////////////////
//// table 3  /////
///////////////////

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1


* table 2: arsenic and bacteria, WS test
eststo clear

global weight pweight_normal_final

rename ws_arsenic_any_wmean ws_as_any_wm
rename ws_arsenic_50plus_wmean ws_as_50_wm
rename ws_arsenic_res_wmean ws_as_res_wm
rename ws_enterbac_fc_exp_wmean ws_bac_any_wm
rename *ws_arsenic_any_wmean *ws_as_any_wm
rename *ws_arsenic_50plus_wmean *ws_as_50_wm
rename *ws_arsenic_res_wmean *ws_as_res_wm
rename *ws_enterbac_fc_exp_wmean *ws_bac_any_wm

foreach v of varlist ws_as_any_wm ws_as_50_wm ws_as_res_wm ws_bac_any_wm{
	
	* RBI
	*This code generates the p-values for inference over differences in means for each characteristic separately, by using reshuffled treatment status from resimulated lotteries
	*This matrix stores the differences in means between all treated groups and the control group under all of the reshuffled treatment status
	mat dif_`v'_t_rbi = J($repetitions_rbi,1,.)
	preserve
	keep dif_`v' control* treated* $weight lu_* village_code
	forvalues i = 1(1)$repetitions_rbi {
		quiet reg_centered dif_`v' treated_trvsco_`i' [aweight = $weight], cluster(village_code) control(lu_*) 
		mat dif_`v'_t_rbi[`i',1] = _b[treated_trvsco_`i']
	}
	restore
	svmat dif_`v'_t_rbi
	*This regression recovers the true difference in means between all treated groups and the control group under the realized treatment status
	quiet reg_centered dif_`v' treated [aweight = $weight], cluster(village_code) control(lu_*)
	*The code below recovers the p-values implied by the RBI
	capture drop temp
	gen temp = (abs(_b[treated])<= abs(dif_`v'_t_rbi1))
	quiet summarize temp if dif_`v'_t_rbi1 != .
	global pvalue_rbi = r(mean)
	drop temp


	* non RBI
	eststo m_dif_`v' : reg_centered dif_`v' treated [aweight=pweight_normal_final] , cluster(village_code) control(lu_*) level(90)
	
	quiet summarize `v' [aweight=pweight_normal_final] if e(sample)==1
	global meanbase=r(mean)
	estadd scalar meanbase=$meanbase
	
* calculate and save non-RBI pvalues
	quiet test treated 
	global pvalue = r(p)
	estadd scalar pvalue = $pvalue
	* save RBI pvalues is eststo
	estadd scalar pvalue_rbi = $pvalue_rbi

}
* export table
#delimit ;
esttab m_* using "$path_tables/Table3.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(treated _cons) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" 
		"\specialcell{\;\;\;\;\;Arsenic \\ contamination \\ (Bangladeshi \\ \;\; threshold)}"
		"\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\; level}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
	pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) 
	stats(pvalue pvalue_rbi meanbase r2 N , layout(@ @ @ @) labels("p-value (analytical)" "p-value (RBI)" "Mean at baseline" "R$^2$" "N") fmt( %9.2f %9.2f %9.2f %9.2f %9.0f ))
;
#delimit cr
eststo clear



///////////////////
//// table 4 //////
///////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 
eststo clear
global weight pweight_normal_final

rename ws_hh_distance_final_wmean ws_hh_dist_f_wmean
rename *ws_hh_distance_final_wmean *ws_hh_dist_f_wmean


foreach v of varlist ws_hh_dist_f_wmean ws_dist_min_wmean source_test_stored water_storage_d {

	* RBI
	*This code generates the p-values for inference over differences in means for each characteristic separately, by using reshuffled treatment status from resimulated lotteries
	*This matrix stores the differences in means between all treated groups and the control group under all of the reshuffled treatment status
	mat dif_`v'_t_rbi = J($repetitions_rbi,1,.)
	preserve
	keep dif_`v' control* treated* $weight lu_* village_code
	forvalues i = 1(1)$repetitions_rbi {
		quiet reg_centered dif_`v' treated_trvsco_`i' [aweight = $weight], cluster(village_code) control(lu_*)
		mat dif_`v'_t_rbi[`i',1] = _b[treated_trvsco_`i']
	}
	restore
	svmat dif_`v'_t_rbi
	*This regression recovers the true difference in means between all treated groups and the control group under the realized treatment status
	quiet reg_centered dif_`v' treated [aweight = $weight], cluster(village_code) control(lu_*)
	*The code below recovers the p-values implied by the RBI
	capture drop temp
	gen temp = (abs(_b[treated])<= abs(dif_`v'_t_rbi1))
	quiet summarize temp if dif_`v'_t_rbi1 != .
	global pvalue_rbi = r(mean)
	drop temp
	
	* non RBI
	eststo m_dif_`v' : reg_centered dif_`v' treated [aweight=pweight_normal_final] , cluster(village_code) control(lu_*) level(90)
	
	quiet summarize `v' if e(sample)==1 [aweight=pweight_normal_final]
	global meanbase=r(mean)
	estadd scalar meanbase=$meanbase
	
	* calculate and save non-RBI pvalues
	quiet test treated 
	global pvalue = r(p)
	estadd scalar pvalue = $pvalue
	* save RBI pvalues is eststo
	estadd scalar pvalue_rbi = $pvalue_rbi

}
* export table
#delimit ;
esttab m_* using "$path_tables/Table4.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(treated _cons) 
	nomtitle 
	mgroups("\specialcell{\;Distance \\ HH-WS (m)}" "\specialcell{\;\;\;Distance \\ HH-WS (min)}" "\specialcell{Observed \\ \; storage}"
		"\specialcell{Reported \\ \; storage}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) 
	stats(pvalue pvalue_rbi meanbase r2 N , layout(@ @ @ @) labels("p-value (analytical)" "p-value (RBI)" "Mean at baseline" "R$^2$" "N") fmt( %9.2f %9.2f %9.2f %9.2f %9.0f ))
;
#delimit cr
eststo clear



///////////////////
//// table 5 //////
///////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
eststo clear
global weight pweight_normal_final

rename dif_water_storage_uncovered_dd dif_stor_uncovered_dd
rename fu_water_storage_uncovered_dd fu_stor_uncovered_dd
rename water_storage_uncovered_dd stor_uncovered_dd

rename dif_water_storage_floor_dd dif_water_stor_floor_dd
rename fu_water_storage_floor_dd fu_water_stor_floor_dd
rename water_storage_floor_dd water_stor_floor_dd

rename fu_source_test_uncovered_dd fu_sourcet_uncovered_dd

foreach v of varlist dif_stor_uncovered_dd dif_water_stor_floor_dd fu_sourcet_uncovered_dd fu_source_test_floor_dd fu_source_test_scooped_dd {
	
	* RBI
	*This code generates the p-values for inference over differences in means for each characteristic separately, by using reshuffled treatment status from resimulated lotteries
	*This matrix stores the differences in means between all treated groups and the control group under all of the reshuffled treatment status
	mat `v'_t_rbi = J($repetitions_rbi,1,.)
	preserve
	keep `v' control* treated* $weight lu_* village_code
	forvalues i = 1(1)$repetitions_rbi {
		quiet reg_centered `v' treated_trvsco_`i' [aweight = $weight], cluster(village_code) control(lu_*)
		mat `v'_t_rbi[`i',1] = _b[treated_trvsco_`i']
	}
	restore
	svmat `v'_t_rbi
	*This regression recovers the true difference in means between all treated groups and the control group under the realized treatment status
	quiet reg_centered `v' treated [aweight = $weight], cluster(village_code) control(lu_*)
	*The code below recovers the p-values implied by the RBI
	capture drop temp
	gen temp = (abs(_b[treated])<= abs(`v'_t_rbi1))
	quiet summarize temp if `v'_t_rbi1 != .
	global pvalue_rbi = r(mean)
	drop temp

	* non RBI 
	eststo m_`v' : reg_centered `v' treated [aweight=pweight_normal_final] , cluster(village_code) control(lu_*) level(90)
	if "`v'" != "dif_stor_uncovered_dd" & "`v'" != "dif_water_stor_floor_dd" {
		estadd local only_fu "$\checkmark$"
	}
	
	if "`v'" == "dif_stor_uncovered_dd" {
		quiet summarize stor_uncovered_dd if e(sample)==1 [aweight=pweight_normal_final]
		global meanbase=r(mean)
		estadd scalar meanbase=$meanbase
	}
	
	if "`v'" == "dif_water_stor_floor_dd" {
		quiet summarize water_stor_floor_dd if e(sample)==1 [aweight=pweight_normal_final]
		global meanbase=r(mean)
		estadd scalar meanbase=$meanbase
	}

	* calculate and save non-RBI pvalues
	quiet test treated 
	global pvalue = r(p)
	estadd scalar pvalue = $pvalue
	* save RBI pvalues
	estadd scalar pvalue_rbi = $pvalue_rbi
}
* export table
#delimit ;
esttab m_* using "$path_tables/Table5.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(treated _cons) 
	nomtitle 
	mgroups("\specialcell{\; Containers \\ are uncovered \\ (self-reported)}" "\specialcell{Containers are \\ \; on the floor \\ (self-reported)}"
		"\specialcell{\; Containers \\ are uncovered \\ \; (observed)}" "\specialcell{Containers are \\ \; on the floor \\ \; (observed)}"
		"\specialcell{\; Water is \\ \; scooped \\ (observed)}", 
		pattern(1 1 1 1 1 ) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) 
	stats(only_fu pvalue pvalue_rbi meanbase r2 N , layout(@ @ @ @) labels("Only endline data" "p-value (analytical)" "p-value (RBI)" "Mean at baseline" "R$^2$" "N") fmt( %9.0g %9.2f %9.2f %9.2f %9.2f %9.0f ))
;
#delimit cr
eststo clear


///////////////////
//// table 6 //////
///////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
eststo clear
global weight pweight_normal_final

eststo clear

rename dif_hh_arsenic_field_test_result dif_hh_arsenic_field_res
rename fu_hh_arsenic_field_test_result fu_hh_arsenic_field_res

foreach v of varlist hh_arsenic_any  hh_arsenic_50plus hh_arsenic_field_res hh_enterbac_fc_exp {

	* RBI
	*This code generates the p-values for inference over differences in means for each characteristic separately, by using reshuffled treatment status from resimulated lotteries
	*This matrix stores the differences in means between all treated groups and the control group under all of the reshuffled treatment status
	mat dif_`v'_t_rbi = J($repetitions_rbi,1,.)
	preserve
	keep dif_`v' control* treated* $weight lu_* village_code
	forvalues i = 1(1)$repetitions_rbi {
		quiet reg_centered dif_`v' treated_trvsco_`i' [aweight = $weight], cluster(village_code) control(lu_*)
		mat dif_`v'_t_rbi[`i',1] = _b[treated_trvsco_`i']
	}
	restore
	svmat dif_`v'_t_rbi
	*This regression recovers the true difference in means between all treated groups and the control group under the realized treatment status
	quiet reg_centered dif_`v' treated [aweight = $weight], cluster(village_code) control(lu_*)
	*The code below recovers the p-values implied by the RBI
	capture drop temp
	gen temp = (abs(_b[treated])<= abs(dif_`v'_t_rbi1))
	quiet summarize temp if dif_`v'_t_rbi1 != .
	global pvalue_rbi = r(mean)
	drop temp
	
	* non RBI
	eststo m_dif_`v' : reg_centered dif_`v' treated [aweight=pweight_normal_final] , cluster(village_code) control(lu_*) level(90)
	
	quiet summarize `v' if e(sample)==1 [aweight=pweight_normal_final]
	global meanbase=r(mean)
	estadd scalar meanbase=$meanbase
	
	* calculate and save non-RBI pvalues
	quiet test treated 
	global pvalue = r(p)
	estadd scalar pvalue = $pvalue
	* save RBI pvalues is eststo
	estadd scalar pvalue_rbi = $pvalue_rbi
	
	
	
}
* export table
#delimit ;
esttab m_* using "$path_tables/Table6.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(treated _cons) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" 
		"\specialcell{\;\;\;\;\;Arsenic \\ contamination \\ (Bangladeshi \\ \;\; threshold)}"
		"\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\; level}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) 
	stats(pvalue pvalue_rbi meanbase r2 N , layout(@ @ @ @) labels("p-value (analytical)" "p-value (RBI)" "Mean at baseline" "R$^2$" "N") fmt( %9.2f %9.2f %9.2f %9.2f %9.0f ))
;
#delimit cr
eststo clear


/////////////////
//// table 7 ////
/////////////////

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 
eststo clear
global weight pweight_normal_final

eststo clear

// rename vars and labels
gen diff_ws_bac = dif_ws_enterbac_fc_exp_wmean 
label var diff_ws_bac "Source fecal contamination"

gen diff_dist_r = dif_ws_dist_min_wmean 
label var diff_dist_r "Travel time hh-ws (mins, reported)"

gen diff_stored = dif_source_test_stored
label var diff_stored "Observed storage"

foreach dist in dist_r {
	*With storage controls 
	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code) level(90)
	estimates store e_did_sto_`dist'
	estadd local fe "No"
	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code) level(90)
	estimates store e_did_sto_`dist'_fe
	estadd local fe "Yes"
	*Without storage controls 
	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) level(90)
	estimates store e_did_`dist'
	estadd local fe "No"
	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code) level(90)
	estimates store e_did_`dist'_fe
	estadd local fe "Yes"
}
foreach dist in dist_r {
#delimit ;
esttab e_did*`dist'* using "$path_tables/Table7.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(diff* _cons) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\; Drinking water fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(fe r2 N, layout(@ @) labels("Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.2f %9.0f ));
;
#delimit cr
}
eststo clear


***FIGURES: main paper


////////////////////
//// figure 7a ////
////////////////////

use "$data/temp/temp_analysis.dta", clear

set scheme s1mono
rename final_distance_hh_constr_mins final_dist_hh_constr_mins

* select final sample
keep if complete_panel == 1 // final panel

twoway (lpoly water_per_day_share_cclpg final_dist_hh_constr_mins if treated == 1 & hh_arsenic_any == 0 & success_rate == 1, lcolor(gs12) lpattern(shortdash)  ) ///
	(lpoly water_per_day_share_cclpg final_dist_hh_constr_mins if treated == 1 & success_rate == 1 & hh_arsenic_any == 1 & hh_arsenic_50plus == 0,  lcolor(gs6) lpattern(longdash) ) ///
	(lpoly water_per_day_share_cclpg final_dist_hh_constr_mins if  treated == 1 & success_rate == 1 & hh_arsenic_any == 1 & hh_arsenic_50plus == 1, lcolor(black) lpattern(solid)), ///
	xtitle("Walking dist from installed source in minutes") ytitle("Share of household water from project source") ///
	legend(order(- "Household baseline As:" 1 "None" 2 "Low" 3 "High") row(2) nocolfirst holes(2 3))
graph export "$path_graphs/cocciolo_fig7a.tif", replace as(tif)  


////////////////////
//// figure 7b ////
////////////////////

*Self-reported reasons for non-use 
use "$data/temp/temp_analysis.dta", clear

set scheme s1mono
rename final_distance_hh_constr_mins final_dist_hh_constr_mins
rename fu_no_use_cclpg_tw_* no_use_*
rename no_use_cclpg_tw_* no_use_*

* select final sample
keep if complete_panel == 1 // final panel

global vars no_use_safe_1 no_use_arsenic_1 no_use_far_1 no_use_queue_1 no_use_location_1  no_use_access_1 no_use_access_time_1 no_use_access_freq_1 no_use_access_season_1 no_use_fee_1  no_use_private_1 no_use_female_1

foreach var of global vars {
	capture drop `var'_d
	tostring `var' , replace
	gen `var'_d = .
	replace `var'_d = 1 if `var' == "agree" | `var' == "strong_agree"
	replace `var'_d = 0 if `var' == "disagree" | `var' == "strong_disagree" | `var' == "neither"
}

foreach var of varlist no_use_access*1_d no_use_fee_1_d  no_use_private_1_d no_use_female_1_d {
	replace `var' = 0 if `var' == . & fu_use_cclpg_tw_1 == "no"
} 

gen no_use_access_all_1_d = max( no_use_access_1_d, no_use_access_time_1_d, no_use_access_freq_1_d, no_use_access_season_1_d)


global varlist no_use_female_1_d no_use_safe_1_d no_use_arsenic_1_d no_use_far_1_d no_use_queue_1_d no_use_access_all_1_d no_use_fee_1_d no_use_private_1_d 


local lab_no_use_female_1_d "Women uncomfortable" 
local lab_no_use_private_1_d "Well is on private land" 
local lab_no_use_fee_1_d "Landowner charges a fee" 
local lab_no_use_access_all_1_d "Landowner restricts access" 
local lab_no_use_queue_1_d "Waiting time too long" 
local lab_no_use_far_1_d "Too far from household" 
local lab_no_use_safe_1_d "Our own well is safe"  
local lab_no_use_arsenic_1_d "Not worried about As"

local reason_vars = ""

foreach var of global varlist {

	sum `var'  if treated == 1 & success_rate == 1 & final_dist_hh_constr_mins < 6
	
	if r(mean) > 0.05 {
		local reason_vars = "`reason_vars' `var'"
	}

}

local graph_command = ""
local text_command = ""

capture drop tempvar
gen tempvar = _n if _n <= 6

foreach var of local reason_vars {

	capture drop x y
	
	disp "`var'"
	
	lpoly `var' final_dist_hh_constr_mins if treated == 1 & success_rate == 1 & final_dist_hh_constr_mins < 6, bwidth(0.7)  at(tempvar) gen(x y) nodraw	
	sum y if x == 6 
	local `var'_y = r(mean)
	
	capture drop x y 
	
	
	local graph_command = "`graph_command' (lpoly `var' final_dist_hh_constr_mins if treated == 1 & success_rate == 1 & final_dist_hh_constr_mins < 6, bwidth(0.7)) "
	
	local text_command "`text_command'  text(``var'_y' 6  `"`lab_`var''"', size(small) place(e)) " 

}

drop tempvar

twoway `graph_command', `text_command' xscale(range(0 8)) legend(off) xtitle("Walking distance from installed source in minutes") ytitle("Share of non-users agreeing with statement") name(reasons, replace)

graph export "$path_graphs/cocciolo_fig7b.tif", replace as(tif)  



////////////////////
//// figure 8a ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

set scheme s1mono
rename dif_hh_arsenic_field_test_result dif_hh_arsenic_field_res 

* select final sample
keep if complete_panel == 1 // final panel


cumul dif_hh_arsenic_field_res if treated == 0 [aweight=pweight_normal_final] , gen(ars_control_cumul) equal
	label var ars_control_cumul "Empirical CDF"
cumul dif_hh_arsenic_field_res if treated == 1 [aweight=pweight_normal_final], gen(ars_treated_cumul) equal
	label var ars_treated_cumul "Empirical CDF"
twoway (line ars_treated_cumul dif_hh_arsenic_field_res if treated ==1 , sort) (line ars_control_cumul dif_hh_arsenic_field_res if treated ==0 , sort), legend(order(1 "Treated" 2 "Control")) name(dif_ars_cdf_by_treatment, replace) xtitle("Change in water source As contamination (ppb)") xlabel(-500(100)500) scheme(s1mono)
graph export "$path_graphs/cocciolo_fig8a.tif", replace as(tif)  


////////////////////
//// figure 8b ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

set scheme s1mono

* select final sample
keep if complete_panel == 1 // final panel


cumul dif_ws_arsenic_res_wmean if treated == 0 [aweight=pweight_normal_final] , gen(ars_ws_control_cumul) equal
	label var ars_ws_control_cumul "Empirical CDF"
cumul dif_ws_arsenic_res_wmean if treated == 1 [aweight=pweight_normal_final], gen(ars_ws_treated_cumul) equal
	label var ars_ws_treated_cumul "Empirical CDF"
twoway (line ars_ws_treated_cumul dif_ws_arsenic_res_wmean if treated ==1 , sort) (line ars_ws_control_cumul dif_ws_arsenic_res_wmean if treated == 0 , sort), legend(order(1 "Treated" 2 "Control")) name(dif_ars_cdf_ws_by_treatment, replace) xtitle("Change in household As contamination (ppb)") xlabel(-500(100)500) scheme(s1mono)
graph export "$path_graphs/cocciolo_fig8b.tif", replace as(tif)  



///////////////////////////////////////////////
//// figure 9a and figures S17.1 and S17.2 ////
///////////////////////////////////////////////


use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 
eststo clear
global weight pweight_normal_final

rename  ws_enterbac_fc_exp_wmean ws_bac_exp_wmean
rename  *ws_enterbac_fc_exp_wmean *ws_bac_exp_wmean
rename final_distance_hh_constr_mins final_dist_hh_constr_mins
rename ws_arsenic_50plus_wmean ws_arsenic_50_wmean

//Goal: simulate 6 scenarios

//3 scenarios for take-up: observed take-up, 50% increase in take-up, doubling of take-up
//2 scenarios for fecal contamination in project wells: observed or free of fecal contaminaton 

//Model plausible mode for increased take-up: those households who partially adopt the well, collect more water from the well

//Simulate three outcomes: water source contamination for As and FC, household contamination for FC. 

//Step 1: Simulate increased take-up.  Simplest approach: proportionally increase take-up among adopters, capping adoption (obviously) at 100%.  

//To achieve take-up at X factor, first multiply take-up by X, then cap adoption at 100%, repeat until you achieve total mean take-up at X times observed take-up.

//Store true mean of take-up

sum water_per_day_share_cclpg [aweight = $weight], meanonly
local truemean = r(mean)

//Calculate share not from CCLPG wells 

gen water_p_d_share_non_cclpg = 1 - water_per_day_share_cclpg

//Simulate for factors 1.5 and 2, denote simulations 1 and 2

forvalues i = 1(1)2 {

	*create variable to store simulated take-up

	capture drop wpd_share_cclpg_tusim_`i'
	gen wpd_share_cclpg_tusim_`i' = water_per_day_share_cclpg

	*initiate
	
	sum wpd_share_cclpg_tusim_`i' [aweight = $weight], meanonly
	local simmean = r(mean)
	local simmean_prev = r(mean)

	local ratio = `simmean'/`truemean' 
	
	if `i' == 1 {
		local target = 1.5
		local factor = 1.5
		local threshold = 1.499	
	}
	
	else if `i' == 2 {
		local target = 2
		local factor = 2
		local threshold = 1.999	
	}

	*iterate until ratio equal to target within threshold for closeness 
	
	while `ratio' < `threshold' {

		replace wpd_share_cclpg_tusim_`i' = `factor' * wpd_share_cclpg_tusim_`i'
		replace wpd_share_cclpg_tusim_`i' = min(wpd_share_cclpg_tusim_`i',1)

		sum wpd_share_cclpg_tusim_`i' [aweight = $weight], meanonly
	
		local simmean = r(mean)

		local ratio = `simmean'/`truemean' 
		disp "`ratio'"
	
		local factor =`target'/`ratio'
		disp "`factor'"

	}	
	
	*Calculate adjustment factors by household

	capture drop wpd_share_non_cclpg_tusim_`i'
	gen wpd_share_non_cclpg_tusim_`i' = 1 - wpd_share_cclpg_tusim_`i'
	
	capture drop factor_cclpg_tusim_`i'
	gen factor_cclpg_tusim_`i' = wpd_share_cclpg_tusim_`i'/water_per_day_share_cclpg
	
	capture drop factor_non_cclpg_tusim_`i'
	gen factor_non_cclpg_tusim_`i' = wpd_share_non_cclpg_tusim_`i'/water_p_d_share_non_cclpg	
	
}

gen factor_cclpg_tusim_0 = 1
gen factor_non_cclpg_tusim_0 = 1

//Simulate water source level contamination, for base rates, 50% increase and 100% increase, denote tusim 0, 1, and 2 respectively

//Simulate: i) arsenic, ii) fecal contamination, and iii) fecal contamination if all project wells were free of contamination

forvalues t = 0(1)2 {

	foreach i of numlist 1(1)4 {
	
		*calculate simulated shares from each water source

		capture drop fu_wpd_share_ws`i'_tusim_`t'
		gen fu_wpd_share_ws`i'_tusim_`t' = fu_water_per_day_share_ws`i' *  factor_cclpg_tusim_`t' if fu_ws`i'_project_tw == 1 
		replace fu_wpd_share_ws`i'_tusim_`t' = fu_water_per_day_share_ws`i' *  factor_non_cclpg_tusim_`t' if fu_ws`i'_project_tw == 0
		
		*calculate weighted contamination from each water source and weighted distance (in minutes, reported)
		
		capture drop fu_ws_asany_w_ws`i'_tusim_`t'
		gen fu_ws_asany_w_ws`i'_tusim_`t' = fu_arsenic_any_ws`i' * fu_wpd_share_ws`i'_tusim_`t'
		
		capture drop fu_ws_fc_w_ws`i'_tusim_`t'
		gen fu_ws_fc_w_ws`i'_tusim_`t' = fu_enterbac_fc_exp_ws`i' * fu_wpd_share_ws`i'_tusim_`t'
		
		capture drop fu_ws_fc_sim_w_ws`i'_tusim_`t'
		gen fu_ws_fc_sim_w_ws`i'_tusim_`t' = fu_ws_fc_w_ws`i'_tusim_`t' * (1-fu_ws`i'_project_tw)
		
		capture drop fu_ws_dist_r_w_ws`i'_tusim_`t'
		gen fu_ws_dist_r_w_ws`i'_tusim_`t' = fu_dist_min_ws`i' * fu_wpd_share_ws`i'_tusim_`t'
	
	}
	
	*calculate weighted means foreach variable
	
	foreach v in asany fc fc_sim dist_r {
	
		capture drop fu_ws_`v'_wm*_tusim_`t'
	
		egen fu_ws_`v'_wm1_tusim_`t' = rowtotal(fu_ws_`v'_w_ws*_tusim_`t') if fu_ws_`v'_w_ws1_tusim_`t'!=. 
		egen fu_ws_`v'_wm2_tusim_`t' = rowtotal(fu_ws_`v'_w_ws*_tusim_`t') if fu_ws_`v'_w_ws1_tusim_`t'!=. & fu_ws_`v'_w_ws2_tusim_`t' !=. & fu_num_water_source==2
		egen fu_ws_`v'_wm3_tusim_`t' = rowtotal(fu_ws_`v'_w_ws*_tusim_`t') if fu_ws_`v'_w_ws1_tusim_`t'!=. & fu_ws_`v'_w_ws2_tusim_`t' !=. & fu_ws_`v'_w_ws3_tusim_`t' !=. & fu_num_water_source==3
		egen fu_ws_`v'_wm4_tusim_`t' = rowtotal(fu_ws_`v'_w_ws*_tusim_`t') if fu_ws_`v'_w_ws1_tusim_`t'!=. & fu_ws_`v'_w_ws2_tusim_`t' !=. & fu_ws_`v'_w_ws3_tusim_`t' !=. & fu_ws_`v'_w_ws4_tusim_`t' !=. & fu_num_water_source==4
			
		gen fu_ws_`v'_wm_tusim_`t' =  fu_ws_`v'_wm1_tusim_`t' if fu_num_water_source==1
		replace fu_ws_`v'_wm_tusim_`t' = fu_ws_`v'_wm2_tusim_`t' if fu_num_water_source==2
		replace fu_ws_`v'_wm_tusim_`t' = fu_ws_`v'_wm3_tusim_`t' if fu_num_water_source==3
		replace fu_ws_`v'_wm_tusim_`t' = fu_ws_`v'_wm4_tusim_`t' if fu_num_water_source==4

	}

	gen dif_ws_asany_wm_tusim_`t' = fu_ws_asany_wm_tusim_`t' - ws_arsenic_any_wmean
	gen dif_ws_fc_wm_tusim_`t' = fu_ws_fc_wm_tusim_`t' - ws_bac_exp_wmean
	gen dif_ws_fc_sim_wm_tusim_`t' = fu_ws_fc_sim_wm_tusim_`t' - ws_bac_exp_wmean
	gen dif_ws_dist_r_wm_tusim_`t' = fu_ws_dist_r_wm_tusim_`t' - ws_dist_min_wmean

}
	
//Estimate results for water source contamination 


mat dif_ws_sim_ests = J(3,4,.)

mat dif_ws_sim_ests[1,1] = 1
mat dif_ws_sim_ests[2,1] = 1.5
mat dif_ws_sim_ests[3,1] = 2

local i = 2

foreach v in asany fc fc_sim  {

	forvalues t = 0(1)2 {

		quiet reg_centered dif_ws_`v'_wm_tusim_`t' treated [aweight = $weight], cluster(village_code) control(lu_*)
		estimates store e_dif_ws_`v'_wm_tusim_`t'
		
		local j = `t' + 1
		
		mat dif_ws_sim_ests[`j',`i'] = _b[treated]
		
	}
	
	local i = `i' + 1

}

*plot results 

svmat dif_ws_sim_ests

format dif_ws_sim_ests2 dif_ws_sim_ests3 dif_ws_sim_ests4 %04.3f

twoway (scatter dif_ws_sim_ests2 dif_ws_sim_ests1, msymbol(square) mcolor(black) mlabel(dif_ws_sim_ests2) ) (scatter dif_ws_sim_ests3 dif_ws_sim_ests1, msymbol(circle) mcolor(gs12) mlabel(dif_ws_sim_ests3)) (scatter dif_ws_sim_ests4 dif_ws_sim_ests1, msymbol(circle) mcolor(gs6) mlabel(dif_ws_sim_ests4)), xlabel(1 "Observed takeup" 1.5 "50% increase" 2 "100% increase") yscale(range(0 -0.1)) ylabel(-0.1(0.02)0, labsize(small)  angle(0)) xscale(range(0.7 2.3))  xtitle("Modelled takeup") ytitle("Simulated effect") legend(order(1 "Effect on arsenic at WHO threshold" 2 "Effect on fecal contamination, observed contamination in project wells" 3 "Effect on fecal contamination, project wells free of fecal contamination") col(1) size(small)) name(source_effects, replace)

graph export "$path_graphs/cocciolo_fig9a.tif", replace as(tif)  

*calculate elasticities 

*arsenic 

disp dif_ws_sim_ests2[3]/dif_ws_sim_ests2[1] 

*fecal contamination with observed project well contamination 

disp dif_ws_sim_ests3[3]/dif_ws_sim_ests3[1] 

*fecal contamination if project wells free from contamination 

disp dif_ws_sim_ests4[3]/dif_ws_sim_ests4[1] 

*estimate effects on travel distance 

mat dif_tr_sim_ests = J(3,2,.)

mat dif_tr_sim_ests[1,1] = 1
mat dif_tr_sim_ests[2,1] = 1.5
mat dif_tr_sim_ests[3,1] = 2

local i = 2

foreach v in dist_r  {

	forvalues t = 0(1)2 {

		quiet reg_centered dif_ws_`v'_wm_tusim_`t' treated [aweight = $weight], cluster(village_code) control(lu_*)
		estimates store e_dif_ws_`v'_wm_tusim_`t'
		
		local j = `t' + 1
		
		mat dif_tr_sim_ests[`j',`i'] = _b[treated]
		
	}
	
	local i = `i' + 1

}

*plot results 

svmat dif_tr_sim_ests

format dif_tr_sim_ests2 %04.3f

twoway (scatter dif_tr_sim_ests2 dif_tr_sim_ests1, msymbol(square) mcolor(black) mlabel(dif_tr_sim_ests2) ), xlabel(1 "Observed takeup" 1.5 "50% increase" 2 "100% increase") yscale(range(0 0.2)) ylabel(0(0.05)0.2, labsize(small)  angle(0)) xscale(range(0.7 2.3))  xtitle("Modelled takeup") ytitle("Simulated change in distance") legend(off) name(dist_effects, replace)

graph export "$path_graphs/dist_effects.pdf", replace 


*plot simulated take-up patterns

gen wpd_share_cclpg_tusim_0 = water_per_day_share_cclpg

local subtitle_tusim_0 "a) Observed take-up"
local subtitle_tusim_1 "b) 50% increase in take-up"
local subtitle_tusim_2 "c) 100% increase in take-up"


forvalues t = 0(1)2 {

	twoway (lpoly wpd_share_cclpg_tusim_`t' final_dist_hh_constr_mins if treated == 1 & hh_arsenic_any == 0 & success_rate == 1, lcolor(gs12) lpattern(shortdash)) (lpoly wpd_share_cclpg_tusim_`t' final_dist_hh_constr_mins if treated == 1 & success_rate == 1 & hh_arsenic_any == 1 & hh_arsenic_50plus == 0 , lcolor(gs6) lpattern(longdash)) (lpoly wpd_share_cclpg_tusim_`t' final_dist_hh_constr_mins if  treated == 1 & success_rate == 1 & hh_arsenic_any == 1 & hh_arsenic_50plus == 1 , lcolor(black) lpattern(solid)), subtitle("`subtitle_tusim_`t''") xtitle("Walking dist from installed source in minutes") ytitle("Share of drinking/cooking" "water from project source") legend(order(- "Household baseline As:" 1 "None" 2 "Low" 3 "High") rows(1) size(vsmall)) graphregion(fcolor(white)) xlabel(, nogrid) ylabel(0(0.2)1, nogrid) name(takeup_with_dist_by_As_tusim_`t', replace)
 
}

grc1leg takeup_with_dist_by_As_tusim_0 takeup_with_dist_by_As_tusim_1 takeup_with_dist_by_As_tusim_2, cols(1) name(sim_takeup, replace)

graph display sim_takeup, xsize(3.5) ysize(6)

graph export "$path_graphs/sim_takeup.pdf", replace

*relationship between take-up and baseline variables

foreach var in ws_arsenic_any_wmean ws_arsenic_50_wmean ws_bac_exp_wmean final_dist_hh_constr_mins {

	forvalues t = 0(1)2 {

		disp "`var', take-up simulation `t'"
	
		reg `var' wpd_share_cclpg_tusim_`t' if success_rate > 0 &  success_rate != . [aweight=pweight_normal_final],  cluster(village_code)
		
	}

}

///////////////////
//// figure 9b ////
///////////////////


*what would household contamination have been?

*estimate model 

//Gen shortnamed variables for regression

gen diff_ws_bac = dif_ws_bac_exp_wmean 
label var diff_ws_bac "Source fecal contamination"

gen diff_dist_r = dif_ws_dist_min_wmean 
label var diff_dist_r "Travel time in minutes, reported"


*estimate model for true parameters.  NB: use model that does not control for storage (implicitly, any correlated changes in storage behaviour load onto changes in distance)

reg dif_hh_enterbac_fc_exp diff_ws_bac diff_dist_r [aweight=pweight_normal_final],  cluster(village_code)

*simulate household contamination under i) three takeup scenarios, ii) two source contamination scenarios and iii) in each case accounting for and not accounting for changes in distance

gen dif_hh_fc_resid = dif_hh_enterbac_fc_exp - _b[diff_ws_bac]*(diff_ws_bac) - _b[diff_dist_r]* diff_dist_r

forvalues t = 0(1)2 {

	gen dif_hh_fc_tusim_`t'_nd = dif_hh_fc_resid + _b[diff_ws_bac]* dif_ws_fc_wm_tusim_`t'
	gen dif_hh_fc_sim_tusim_`t'_nd = dif_hh_fc_resid + _b[diff_ws_bac]* dif_ws_fc_sim_wm_tusim_`t'
	
	gen dif_hh_fc_tusim_`t'_d = dif_hh_fc_resid + _b[diff_ws_bac]* dif_ws_fc_wm_tusim_`t' + _b[diff_dist_r] * dif_ws_dist_r_wm_tusim_`t'
	gen dif_hh_fc_sim_tusim_`t'_d = dif_hh_fc_resid + _b[diff_ws_bac]* dif_ws_fc_sim_wm_tusim_`t'+ _b[diff_dist_r]* dif_ws_dist_r_wm_tusim_`t'
	
}


*estimate simulated effects

mat dif_hh_sim_ests = J(3,5,.)

mat dif_hh_sim_ests[1,1] = 1
mat dif_hh_sim_ests[2,1] = 1.5
mat dif_hh_sim_ests[3,1] = 2

local i = 2

foreach v in fc fc_sim  {

	foreach d in d nd {
	
		forvalues t = 0(1)2 {

			quiet reg_centered dif_hh_`v'_tusim_`t'_`d' treated [aweight = $weight], cluster(village_code) control(lu_*)
			estimates store e_dif_hh_`v'_tusim_`t'_`d'
		
			local j = `t' + 1
		
			mat dif_hh_sim_ests[`j',`i'] = _b[treated]
		
		}
		
		local i = `i' + 1
	
	}
	
}

svmat dif_hh_sim_ests 

format dif_hh_sim_ests2 dif_hh_sim_ests3 dif_hh_sim_ests4 dif_hh_sim_ests5 %04.3f

twoway (scatter dif_hh_sim_ests2 dif_hh_sim_ests1, msymbol(circle) mcolor(gs12) mlabel(dif_hh_sim_ests2)) (scatter dif_hh_sim_ests3 dif_hh_sim_ests1, msymbol(diamond) mcolor(gs12) mlabel(dif_hh_sim_ests3)) (scatter dif_hh_sim_ests4 dif_hh_sim_ests1, msymbol(circle) mcolor(gs6) mlabel(dif_hh_sim_ests4))  (scatter dif_hh_sim_ests5 dif_hh_sim_ests1, msymbol(diamond) mcolor(gs6) mlabel(dif_hh_sim_ests5)), xlabel(1 "Observed takeup" 1.5 "50% increase" 2 "100% increase") ylabel(-0.01(0.005)0.01, labsize(small) angle(0)) yscale(range(0.01 -0.01))  xscale(range(0.7 2.3)) xtitle("Modelled takeup") ytitle("Simulated effect") legend(order(- "Observed fecal contamination in project" - "wells:" 1 "accounting for change in distance" 2 "not accounting for change in distance" - "If project wells free from fecal" - "contamination:" 3 "accounting for change in distance" 4 "not accounting for change in distance") col(2) colfirst size(small)) name(household_effects, replace)

graph export "$path_graphs/cocciolo_fig9b.tif", replace as(tif)  


***TABLES and FIGURES: Appendixes 


///////////////////
//// table S1.3 ////
///////////////////
use "$data/temp/temp_analysis.dta", clear
keep if cclpg_sample==1 

* set globals and keep relevant vars
rename network_nominations network_nom
global balance_vars hh_arsenic_any hh_arsenic_50plus hh_enterbac_fc num_hh_m poverty_score_2usd no_educ_rate read_rate ///
	network_nom network_size muslim v_r_trust_v_much know_ass_ind // leader_hh 
keep $balance_vars success_rate installation_attempt_rate lu_* village_code pweight_b
label var no_educ_rate "Not educated HH members (\%)"

* regressions
rename success_rate s_r
rename installation_attempt_rate ia_r
foreach var of global balance_vars { 
	global lab_`var' : var label `var'
}
foreach var of global balance_vars {
	foreach j of varlist s_r ia_r {
		reg_centered `var' `j' [aweight = pweight_b] , control( lu_* ) cluster(village_code)
		global N_`var' = e(N) 
		gen `var'_beta_`j' = trim("`: display %9.2f _b[`j']'")
		tostring `var'_beta_`j', replace
		global `var'_beta_`j' = trim("`: display %9.2f _b[`j']'") 
		global `var'_se_`j' = trim("`: display %9.2f _se[`j']'") 
		global pvalue = (2 * ttail(e(df_r), abs(_b[`j']/_se[`j'])))
		if $pvalue < 0.01 {
			replace `var'_beta_`j' = "`: display ${`var'_beta_`j'}'" + "***"
		}
		if $pvalue < 0.05 & $pvalue>=0.01 {
			replace `var'_beta_`j' = "`: display ${`var'_beta_`j'}'" + "**"
		}
		if $pvalue < 0.1 & $pvalue>=0.05 {
			replace `var'_beta_`j' = "`: display ${`var'_beta_`j'}'" + "*"
		}
		global `var'_beta_`j' = `var'_beta_`j'
		drop `var'_beta_`j'
	}
}
* export table 
texdoc do "$dofiles/texdoc/table_installations_selection.do"
**creates table_installations_selection.tex


///////////////////
//// table S6.1 ////
///////////////////
use "$data/temp/temp_analysis.dta", clear

global vars num_hh_m hh_head_muslim hh_head_no_educ own_livestock_dummy own_agri_dummy own_agri_qty_acre ///
	toilet_MICS_dummy rooms_sleep mat_floor_dummy mat_roof_dummy hh_a_mobile_dummy asset_motor_dummy

foreach var of global vars { 
	global lab_`var' : var label `var' 
	reg `var' cclpg_sample mics_sample [aweight=pweight_b], nocons cluster(village_code)
	foreach c of newlist cclpg mics {
		global `var'_c_`c'= trim("`: display %9.2g _b[`c'_sample]'")
		global `var'_sd_`c' = trim("`: display %9.3f _se[`c'_sample]'")
	}
}
texdoc do "$dofiles/texdoc/descr_socioeconomic.do"
***Creates socioeconomic_stats.tex

///////////////////
//// table S7.1 ////
///////////////////

use "$data/temp/temp_analysis.dta", clear
keep if cclpg_sample==1 

global treatments control cash labour waiver
global vars num_hh_m hh_head_muslim hh_head_no_educ own_livestock_dummy own_agri_dummy own_agri_qty_acre ///
	toilet_MICS_dummy rooms_sleep mat_floor_dummy mat_roof_dummy hh_a_mobile_dummy asset_motor_dummy

foreach var of global vars { 
	global lab_`var' : var label `var'
	sum `var'
	global N_`var' = r(N)
	*
	xi: reg `var' control cash labour waiver i.union_name [aweight=pweight_b] , nocons cluster(village_code)
	foreach t of global treatments {
		global `var'_m_`t' = trim("`: display %9.2g _b[`t']'")
		global `var'_sd_`t' = trim("`: display %9.3f _se[`t']'") 
	}
	foreach c of varlist cash labour waiver {
		lincom _b[control] - _b[`c']
		pvalue
		gen `var'_m_`c'="${`var'_m_`c'}"
		tostring `var'_m_`c', replace
		if $pvalue < 0.01 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "***"
		}
		if $pvalue < 0.05 & $pvalue>=0.01 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "**"
		}
		if $pvalue < 0.1 & $pvalue>=0.05 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "*"
		}
		global `var'_m_`c' = `var'_m_`c'
		drop `var'_m_`c'
	}
	*
	xi: reg `var' control treated i.union_name [aweight=pweight_b] , nocons cluster(village_code)
	global `var'_m_t = trim("`: display %9.2g _b[treated]'")
	global `var'_sd_t = trim("`: display %9.3f _se[treated]'") 
	lincom _b[control] - _b[treated]
	pvalue
	gen `var'_m_t="${`var'_m_t}"
	tostring `var'_m_t, replace
	if $pvalue < 0.01 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "***"
	}
	if $pvalue < 0.05 & $pvalue>=0.01 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "**"
	}
	if $pvalue < 0.1 & $pvalue>=0.05 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "*"
	}
	global `var'_m_t = `var'_m_t
	drop `var'_m_t
}
* hotelling
preserve
collapse $vars cash labour waiver treated control, by(village_code)
foreach c of varlist cash labour waiver treated {
	hotelling $vars if `c'==1 | control==1, by(`c')
	global Ftest =  ((r(N)-r(k)-1)/((r(N)-2)*(r(k)))) * r(T2)
	global hot_pvalue = Ftail(r(k),r(df),$Ftest)
	global hot_pvalue_`c' = trim("`: display %9.3f Ftail(r(k),r(df),$Ftest) '")
}
restore
* test for joint significance
foreach c of varlist cash labour waiver treated {
	xi: reg `c' $vars i.union_name [aweight=pweight_b] if `c'==1 | control==1, cluster(village_code)
	test $vars
	global joint_pvalue_`c' = trim("`: display %9.3f r(p)'")
}
* export table 
texdoc do "$dofiles/texdoc/balance_socioeconomic.do"
***creates socioeconomic_balance.tex

///////////////////
//// table S7.2 ////
///////////////////

use "$data/temp/temp_analysis.dta", clear
keep if cclpg_sample==1 

global treatments control cash labour waiver

rename ws_enterbac_fc_exp_wmean ws_fc_exp_wmean
rename dif_ws_arsenic_50plus_wmean dif_ws_ars_50_wmean
rename ws_arsenic_50plus_wmean ws_ars_50_wmean  
global vars2 hh_arsenic_any hh_arsenic_50plus hh_enterbac_fc_exp ws_arsenic_any_wmean ws_ars_50_wmean ws_fc_exp_wmean ws_treat_d_wmean /// 	
	source_test_stored collect_water_mins collect_per_day_qty
	
	label var ws_arsenic_any_wmean "WS arsenic contamination (WHO)"
	label var ws_ars_50_wmean "WS arsenic contamination (BD)"
	label var ws_fc_exp_wmean "WS fecal contamination"
	label var hh_enterbac_fc_exp "Fecal contamination (HH test)"
	label var ws_treat_d_wmean "The water is treated to make it safe for drinking"
	label var source_test_stored "Household observed to store drinking water"
	
	
foreach var of global vars2 { 
	global lab_`var' : var label `var'
	sum `var'
	global N_`var' = r(N)
	*
	xi: reg `var' control cash labour waiver i.union_name [aweight=pweight_b] , nocons cluster(village_code)
	foreach t of global treatments {
		global `var'_m_`t' = trim("`: display %9.2g _b[`t']'")
		global `var'_sd_`t' = trim("`: display %9.3f _se[`t']'") 
	}
	foreach c of varlist cash labour waiver {
		lincom _b[control] - _b[`c']
		pvalue
		gen `var'_m_`c'="${`var'_m_`c'}"
		tostring `var'_m_`c', replace
		if $pvalue < 0.01 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "***"
		}
		if $pvalue < 0.05 & $pvalue>=0.01 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "**"
		}
		if $pvalue < 0.1 & $pvalue>=0.05 {
			replace `var'_m_`c' = "`: display ${`var'_m_`c'}'" + "*"
		}
		global `var'_m_`c' = `var'_m_`c'
		drop `var'_m_`c'
	}
	*
	xi: reg `var' control treated i.union_name [aweight=pweight_b] , nocons cluster(village_code)
	global `var'_m_t = trim("`: display %9.2g _b[treated]'")
	global `var'_sd_t = trim("`: display %9.3f _se[treated]'") 
	lincom _b[control] - _b[treated]
	pvalue
	gen `var'_m_t="${`var'_m_t}"
	tostring `var'_m_t, replace
	if $pvalue < 0.01 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "***"
	}
	if $pvalue < 0.05 & $pvalue>=0.01 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "**"
	}
	if $pvalue < 0.1 & $pvalue>=0.05 {
		replace `var'_m_t = "`: display ${`var'_m_t}'" + "*"
	}
	global `var'_m_t = `var'_m_t
	drop `var'_m_t
}
* hotelling
preserve
collapse $vars2 cash labour waiver treated control , by(village_code)
foreach c of varlist cash labour waiver treated {
	hotelling $vars2 if `c'==1 | control==1, by(`c')
	global Ftest =  ((r(N)-r(k)-1)/((r(N)-2)*(r(k)))) * r(T2)
	global hot_pvalue = Ftail(r(k),r(df),$Ftest)
	global hot_pvalue_`c' = trim("`: display %9.3f Ftail(r(k),r(df),$Ftest) '")
}
restore
* test for joint significance
foreach c of varlist cash labour waiver treated {
	xi: reg `c' $vars2 i.union_name [aweight=pweight_b] if `c'==1 | control==1 , cluster(village_code)
	test $vars2
	global joint_pvalue_`c' = trim("`: display %9.3f r(p)'")
}
* export table 
texdoc do "$dofiles/texdoc/balance_water.do"	
***creates water_balance.tex


////////////////////
//// table S11.1 ////
////////////////////

preserve
do  "$dofiles/power_calcs_annotated.do"
restore

////////////////////
//// table S11.1 ////
////////////////////

* IV on use of wells

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1


foreach v of varlist  fu_ws_any_project_tw water_per_day_share_cclpg{
	eststo m_`v' : ivreg2 `v' lu_* (inst_per_hh = cash labour waiver) [aweight=pweight_normal_final] , cluster(village_code) ffirst level(90)
	mat A = e(first)
	estadd scalar first_F_AP = A[15,1]	
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_takeup_iv.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inst_per_hh ) 
	nomtitle 
	mgroups("\specialcell{Uses any \\ project well\\}" "\specialcell{Fraction \\ water from \\ project well}", 
		pattern(1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(first_F_AP r2 N, layout(@ @) labels("First stage F-statistic" "R$^2$" "N") fmt(%9.1f %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S11.2 ////
////////////////////
* IV on installed wells per household

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

rename dif_ws_enterbac_fc_exp_wmean dif_ws_bac_exp_wmean

foreach v of varlist  dif_ws_arsenic_any_wmean dif_ws_bac_exp_wmean{
	eststo m_`v' : ivreg2 `v' lu_* (inst_per_hh = cash labour waiver) [aweight=pweight_normal_final] , cluster(village_code) ffirst level(90)
	mat A = e(first)
	estadd scalar first_F_AP = A[15,1]	
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_ws_iv.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inst_per_hh ) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(first_F_AP r2 N, layout(@ @) labels("First stage F-statistic" "R$^2$" "N") fmt(%9.1f %9.2f %9.0f ))
;
#delimit cr
eststo clear



////////////////////
//// table S11.3 ////
////////////////////
* IV on installed wells per household
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

rename *_ws_hh_distance_final_wmean  *_ws_hh_dist_f_wmean

foreach v of varlist  dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	eststo m_`v' : ivreg2 `v' lu_* (inst_per_hh = cash labour waiver) [aweight=pweight_normal_final] , cluster(village_code) ffirst
	mat A = e(first)
	estadd scalar first_F_AP = A[15,1]	
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_practice_iv.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inst_per_hh ) 
	nomtitle 
	mgroups("\specialcell{\;Distance \\ HH-WS (m)}" "\specialcell{\;\;\;Distance \\ HH-WS (min)}" "\specialcell{Observed \\ \; storage}"
		"\specialcell{Reported \\ \; storage}",
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(first_F_AP r2 N, layout(@ @) labels("First stage F-statistic" "R$^2$" "N") fmt(%9.1f %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S11.4 ////
////////////////////

* IV for installed wells per household
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	eststo m_`v' : ivreg2 `v' lu_* (inst_per_hh = cash labour waiver) [aweight=pweight_normal_final] , cluster(village_code) ffirst
	mat A = e(first)
	estadd scalar first_F_AP = A[15,1]
}

* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_hh_iv.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inst_per_hh) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(first_F_AP r2 N, layout(@ @) labels("First stage F-statistic" "R$^2$" "N") fmt(%9.1f %9.2f %9.0f ))
;
#delimit cr
eststo clear



////////////////////
//// table S12.1 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

foreach v of varlist fu_ws_any_switch fu_ws_switch_share {
	eststo m_`v' :  reg_centered `v' treated [aweight = pweight_normal_final], cluster(village_code) control(lu_*)
}

* export table
esttab m_* using "$path_tables/switching.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(treated _cons) nomtitle mgroups("\specialcell{Uses any \\ new source}" "\specialcell{Fraction \\ water from \\ new source }", pattern(1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))


////////////////////
//// table S12.2a and S12.2b ////
////////////////////
*Well-switching, heterogeneity with respect to baseline As contamination
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

gen t_hh_as_any = treated * hh_arsenic_any
label var t_hh_as_any "Treated x baseline As > 10 ppb"

gen hh_as_any = hh_arsenic_any
label var hh_as_any "Baseline As > 10 ppb"

gen t_hh_as_50plus = treated * hh_arsenic_50plus
label var t_hh_as_50plus "Treated x baseline As > 50 ppb"

gen hh_as_50plus = hh_arsenic_50plus
label var hh_as_50plus "Baseline As > 50 ppb"

gen t_ws_as_any_wm = treated * ws_arsenic_any_wmean
label var t_ws_as_any_wm "Treated x baseline As > 10 ppb"

gen ws_as_any_wm = ws_arsenic_any_wmean
label var ws_as_any_wm "Baseline As > 10 ppb"

gen t_ws_as_50plus_wm = treated * ws_arsenic_50plus_wmean
label var t_ws_as_50plus_wm "Treated x baseline As > 50 ppb"

gen ws_as_50plus_wm = ws_arsenic_50plus_wmean
label var ws_as_50plus_wm "Baseline As > 50 ppb"

eststo clear


foreach o in any share  {

	if "`o'" == "any" {
		local outcome fu_ws_any_switch
	}

	else local outcome fu_ws_switch_share

	foreach v of varlist hh_as_any hh_as_50plus ws_as_any_wm ws_as_50plus_wm {
		eststo m_`o'_`v' :  reg_centered `outcome' `v' treated t_`v' [aweight = pweight_normal_final], cluster(village_code) control(lu_*)
	}
}

* export tables
esttab m_any*hh* m_share*hh*  using "$path_tables/switching_het_ashh.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(treated hh_as_any hh_as_50plus t_hh_as_any t_hh_as_50plus _cons ) order(treated hh_as_any t_hh_as_any hh_as_50plus t_hh_as_50plus _cons)  nomtitle mgroups("\specialcell{Uses any \\ new source}" "\specialcell{Uses any \\ new source}" "\specialcell{Fraction \\ water from \\ new source }" "\specialcell{Fraction \\ water from \\ new source }", pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))

esttab m_any*ws* m_share*ws*  using "$path_tables/switching_het_asws.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(treated ws_as_any_wm ws_as_50plus_wm t_ws_as_any_wm t_ws_as_50plus_wm _cons ) order(treated ws_as_any_wm t_ws_as_any_wm ws_as_50plus_wm t_ws_as_50plus_wm _cons)  nomtitle mgroups("\specialcell{Uses any \\ new source}" "\specialcell{Uses any \\ new source}" "\specialcell{Fraction \\ water from \\ new source }" "\specialcell{Fraction \\ water from \\ new source }", pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))



////////////////////
//// table S12.3a and S12.3b ////
////////////////////
*Effect of well-switching on access to safe water 
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

gen t_any_switch = treated * fu_ws_any_switch
label var t_any_switch "Treated x uses any new source" 

gen t_switch_share = treated * fu_ws_switch_share
label var t_switch_share "Treated x fraction water from new source" 

gen any_switch = fu_ws_any_switch
label var any_switch "Uses any new source" 

gen switch_share = fu_ws_switch_share
label var switch_share "Fraction water from new source" 


gen d_hh_as_50pl = dif_hh_arsenic_50plus
gen d_hh_as_any = dif_hh_arsenic_any
gen d_ws_as_50pl_wm = dif_ws_arsenic_50plus_wmean
gen d_ws_as_any_wme =  dif_ws_arsenic_any_wmean

eststo clear  

foreach o in any share  {

	if "`o'" == "any" {
		local var1 any_switch
		local var2 t_any_switch
	}

	else {
	
		local var1 switch_share
		local var2 t_switch_share
	}

	foreach v of varlist d_hh_as_any d_hh_as_50pl d_ws_as_any_wm d_ws_as_50pl_wm  {
		eststo m_`v'_het_`o' :  reg_centered `v' `var1' `var2' treated [aweight = pweight_normal_final], cluster(village_code) control(lu_*)
	}
}

* export tables
esttab m_*het_any using "$path_tables/effect_switching_any.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(any_switch treated t_any_switch _cons ) order(any_switch  treated t_any_switch _cons)  nomtitle mgroups("\specialcell{As > 10 ppb \\ (household)}" "\specialcell{As > 50 ppb \\ (household)}" "\specialcell{As > 10 ppb \\ (water source)}" "\specialcell{As > 50 ppb \\ (water source)}", pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))

esttab m_*het_share using "$path_tables/effect_switching_share.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(switch_share treated t_switch_share _cons ) order(switch_share  treated t_switch_share _cons)  nomtitle mgroups("\specialcell{As > 10 ppb \\ (household)}" "\specialcell{As > 50 ppb \\ (household)}" "\specialcell{As > 10 ppb \\ (water source)}" "\specialcell{As > 50 ppb \\ (water source)}", pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))

/////////////////
///table S12.4 /////
/////////////////

use  "$data/temp/temp_ws_panel_anonymized.dta", clear

eststo clear

gen treated_post = treated * post

label var post "Post"
label var treated_post "Treated x post"

*estimates

foreach v of varlist ws_arsenic_any ws_arsenic_50plus  ws_arsenic_test_result  enterbac_fc_exp {
	eststo m_`v' :  xtivreg2_centered `v' treated_post post if panel_final ==1 [aweight=weight_panel], cluster(village_code) control(lu_*) fe i(source_id_num) level(90) 
}

* export table
esttab m_* using "$path_tables/change_in_ws_panel.tex", replace cell(b(star fmt(3)) se(par fmt(3)) ) label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) keep(treated_post post) nomtitle mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" "\specialcell{\;\;\;\;\;Arsenic \\ contamination \\ (Bangladeshi \\ \;\; threshold)}" "\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\; level}"  "\specialcell{\;\;\;\; Fecal \\ contamination \\ \;\;\;\;\;}", pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) collabels(none) stats(r2 N, layout(@ @) labels( "R$^2$" "N") fmt(%9.2f %9.0f ))



//////////////////////////
////table S13.1 ///////
/////////////////////////

//Create interaction variables 

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 

rename *arsenic* *as*
rename *wmean *wm

rename *field_test_result* *field_res*

rename *hh_enterbac_fc_exp* *hh_bac_exp*
rename *ws_enterbac_fc_exp* *ws_bac_exp*
rename *as_res_wm *as_field_res_wm

 
//Predict household contamination with source contamination and its interactions with treatment status

foreach contaminant in as_any as_50plus bac_exp {

	foreach time in b f d {
		if "`time'" == "b" {
			local prefix ""
		}
		if "`time'" == "f" {
			local prefix "fu_"
		}
		if "`time'" == "d" {
			local prefix "dif_"
		}
		
		capture drop cont cont_treated
		
		gen cont = `prefix'ws_`contaminant'_wm
		gen cont_treated = `prefix'ws_`contaminant'_wm * treated
	
		reg `prefix'hh_`contaminant' cont cont_treated treated [iweight = pweight_normal_final], vce(cluster village_code) 
		
		estimates store e_rb_`contaminant'_`time'
		
		if "`time'" == "b" {
			estadd local time "Baseline"
		}
		if "`time'" == "f" {
			estadd local time "Endline"
		}
		if "`time'" == "d" {
			estadd local time "Change"
		}
		
		if "`contaminant'" == "as_any" {
			estadd local contaminant "As > 10" 
		}
		
		if "`contaminant'" == "as_50plus" {
			estadd local contaminant "As > 50" 
		}	
		
		if "`contaminant'" == "bac_exp" {
			estadd local contaminant "Fecal" 
		}		
		
	}
	
}

label var cont "Source contamination"
label var cont_treated "Source contamination x treated"

esttab e_rb* using "$path_tables/reporting_bias.tex",  nodepvars nomtitles replace cell(b(star fmt(3)) se(par fmt(3)) ) label collabels(none) nogap number starlevels(* 0.1 ** 0.05 *** 0.01)  stats(N time contaminant, layout(@ @ @) labels("N" "Contaminant" "Time") fmt(%9.0f %9.0f %9.0f)) mgroups("Contamination in household drinking water", pattern(1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
eststo clear

///////////////////////
//// table S14.1 //////
////////////////////////

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

global weight pweight_normal_final
global repetitions_rbi 500


foreach v of varlist  ds ds5 asrep asobs {

	* RBI
	*This code generates the p-values for inference over differences in means for each characteristic separately, by using reshuffled treatment status from resimulated lotteries
	*This matrix stores the differences in means between all treated groups and the control group under all of the reshuffled treatment status
	mat dif_`v'_t_rbi = J($repetitions_rbi,1,.)
	preserve
	keep dif_`v' control* treated* $weight lu_* village_code
	forvalues i = 1(1)$repetitions_rbi {
		quiet reg_centered dif_`v' treated_trvsco_`i' [aweight = $weight], cluster(village_code) control(lu_*)
		mat dif_`v'_t_rbi[`i',1] = _b[treated_trvsco_`i']
	}
	restore
	svmat dif_`v'_t_rbi
	*This regression recovers the true difference in means between all treated groups and the control group under the realized treatment status
	quiet reg_centered dif_`v' treated [aweight = $weight], cluster(village_code) control(lu_*)
	*The code below recovers the p-values implied by the RBI
	capture drop temp
	gen temp = (abs(_b[treated])<= abs(dif_`v'_t_rbi1))
	quiet summarize temp if dif_`v'_t_rbi1 != .
	global pvalue_rbi = r(mean)
	drop temp
	
	* non RBI
	eststo m_dif_`v' : reg_centered dif_`v' treated [aweight=pweight_normal_final] , cluster(village_code) control(lu_*)
	
	quiet summarize `v' if e(sample)==1
	global meanbase=r(mean)
	estadd scalar meanbase=$meanbase

	* calculate and save non-RBI pvalues
	quiet test treated 
	global pvalue = r(p)
	estadd scalar pvalue = $pvalue
	* save RBI pvalues is eststo
	estadd scalar pvalue_rbi = $pvalue_rbi
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_health.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(treated _cons) 
	nomtitle 
	mgroups("\specialcell{ Diarrhea in \\ $\leq$ 12 y.o. \\ in last two weeks }" 
		"\specialcell{ Diarrhea in \\ $\leq$ 5 y.o. \\ in last two weeks  }" "\specialcell{ Reported \\ As poisoning }"  "\specialcell{ Observed \\  As poisoning }", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) 
	stats(pvalue pvalue_rbi meanbase r2 N, layout(@ @ @ @) labels("p-value (analytical)" "p-value (RBI)" "Mean at baseline" "R$^2$" "N") fmt( %9.2f %9.2f %9.2f %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S15.1 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

global weight pweight_normal_final
rename *_ws_enterbac_fc_exp_wmean *_ws_bac_exp_wmean

* interaction with baseline use of safe water
foreach v of varlist dif_ws_arsenic_any_wmean dif_ws_bac_exp_wmean{
	eststo m_`v' : reg `v' as_b_* lu_2-lu_11  [aweight=pweight_normal_final] , cluster(village_code) nocons
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_ws_het_bas_saf.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(as_b_*treated) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R$^2$" "N") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S15.2 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

rename *_ws_hh_distance_final_wmean *_ws_hh_dist_f_wmean

* interaction with baseline use of safe water
foreach v of varlist dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	eststo m_`v' : reg_centered `v' as_b_*  [aweight=pweight_normal_final] , cluster(village_code) control(lu_*) nocons
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_practice_het_bas_saf.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(as_b_*treated) 
	nomtitle 
	mgroups("\specialcell{\;Distance \\ HH-WS (m)}" "\specialcell{\;\;\;Distance \\ HH-WS (min)}" "\specialcell{Observed \\ \; storage}"
		"\specialcell{Reported \\ \; storage}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R$^2$" "N") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear

////////////////////
//// table S15.3 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear
* select final sample
keep if complete_panel == 1 // final panel

* interaction with baseline use of safe water
foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	eststo m_`v' : reg `v' as_b_* lu_2-lu_11  [aweight=pweight_normal_final] , cluster(village_code) nocons
}
* export table
#delimit ;
esttab m_* using "$path_tables/policy_impact_hh_het_bas_saf.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(as_b_*treated) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\; Arsenic \\ contamination \\ \;\;\;\; (WHO \\ \;\; threshold)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R$^2$" "N") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// figure S15.1 ////
////////////////////

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

rename dif_ws_enterbac_fc_exp_wmean dif_ws_bac_exp_wmean

global weight pweight_normal_final

set scheme s1mono

foreach v of varlist dif_ws_arsenic_any_wmean dif_ws_bac_exp_wmean{
	#delimit ;
	twoway (lpolyci `v' final_distance_hh_constr_mins if final_distance_hh_constr_mins != .  & final_distance_hh_constr_mins) ///
	(kdensity final_distance_hh_constr_mins, yaxis(2) lp(dash)),  name(`v'_w_dist, replace) ytitle("Change in source contamination", axis(1)) ///
	ytitle("Density", axis(2)) xtitle("Distance from constructed well" "(Successful treatment units only)") ///
	legend(order(1 "95% CI" 2 "Non-parametric relationship" 3 "Empirical density"))
	;
	#delimit cr
	graph export "$path_graphs/`v'_desc_dist.pdf", replace
}	

////////////////////
//// figure S15.2 ////
////////////////////

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean

foreach v of varlist dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	#delimit ;
	twoway (lpolyci `v' final_distance_hh_constr_mins if final_distance_hh_constr_mins != .  & final_distance_hh_constr_mins) 
	(kdensity final_distance_hh_constr_mins, yaxis(2) lp(dash) ), name(`v'_w_dist, replace) ytitle("Change in outcome", axis(1)) 
	ytitle("Density", axis(2)) xtitle("Distance from constructed well" "(Successful treatment units only)")
	legend(order(1 "95% CI" 2 "Non-parametric relationship" 3 "Empirical density"))
	;
	#delimit cr
	graph export "$path_graphs/`v'_desc_dist.pdf", replace
}	


////////////////////
//// figure S15.3 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	#delimit ;
	twoway (lpolyci `v' final_distance_hh_constr_mins if final_distance_hh_constr_mins != .  & final_distance_hh_constr_mins) 
	(kdensity final_distance_hh_constr_mins, yaxis(2)), name(`v'_w_dist, replace) ytitle("Change in household contamination", axis(1))
	ytitle("Density", axis(2)) xtitle("Distance from constructed well" "(Successful treatment units only)") 
	legend(order(1 "95% CI" 2 "Non-parametric relationship" 3 "Empirical density"))
	;
	#delimit cr
	graph export "$path_graphs/`v'_desc_dist.pdf", replace
}	


////////////////////
//// figure S15.4 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

rename dif_ws_enterbac_fc_exp_wmean dif_ws_ent_wm
rename dif_ws_arsenic_any_wmean dif_ws_ars_wm

foreach v of varlist dif_ws_ent_wm dif_ws_ars_wm {
	foreach anch in algo desk {
		local a = substr("`anch'",1,1)
		eststo m_`v'_`a' : reg `v' hh_d_`anch'* [aweight=pweight_normal_final] , cluster(village_code) nocons
	}
}

foreach v of varlist dif_ws_ent_wm dif_ws_ars_wm {
	coefplot m_`v'_d m_`v'_a, keep(hh_d_*1_t hh_d_*2_t hh_d_*3_t hh_d_*4_t hh_d_*5_t hh_d_*6_t) name(`v'_het_dist, replace)  rename(hh_d_desk([0-9]+)_t$ = \1 hh_d_algo([0-9]+)_t$ = \1, regex) at(_coef) xtitle("Minutes walking distance from predicted location") ytitle("Change in source water contamination") legend(order(2 "Prediction by inspection" 4 "Prediction by algorithm")) levels(90)
	graph export "$path_graphs/`v'_het_dist.pdf", replace
}
eststo clear


/////////////////////
//// figure S15.5 ////
/////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean

foreach v of varlist dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	foreach anch in algo desk {
		local a = substr("`anch'",1,1)
		eststo m_`v'_`a' : reg `v' hh_d_`anch'* [aweight=pweight_normal_final] , cluster(village_code) nocons
	}
}
foreach v of varlist dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	coefplot m_`v'_d m_`v'_a, keep(hh_d_*1_t hh_d_*2_t hh_d_*3_t hh_d_*4_t hh_d_*5_t hh_d_*6_t) name(`v'_het_dist, replace)  rename(hh_d_desk([0-9]+)_t$ = \1 hh_d_algo([0-9]+)_t$ = \1, regex) at(_coef) /// 
	xtitle("Minutes walking distance from predicted location") ytitle("Change in outcome") ///
	legend(order(2 "Prediction by inspection" 4 "Prediction by algorithm")) levels(90)
	graph export "$path_graphs/`v'_het_dist.pdf", replace
}
eststo clear


////////////////////
//// figure S15.6 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean


foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	if "`v'" == "dif_hh_arsenic_any" {
		local v_short dif_hh_as_any
	}
	if "`v'" == "dif_hh_enterbac_fc_exp" {
		local v_short dif_hh_bac_any
	}
	foreach anch in algo desk {
		eststo m_`v_short'_`anch' : reg `v' hh_d_`anch'* lu_2-lu_11  [aweight=pweight_normal_final] , cluster(village_code) nocons
	}
}
foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	if "`v'" == "dif_hh_arsenic_any" {
		local v_short dif_hh_as_any
	}
	if "`v'" == "dif_hh_enterbac_fc_exp" {
		local v_short dif_hh_bac_any
	}
	coefplot m_`v_short'_desk m_`v_short'_algo, keep(hh_d_*1_t hh_d_*2_t hh_d_*3_t hh_d_*4_t hh_d_*5_t hh_d_*6_t) name(`v'_het_dist, replace) ///
	rename(hh_d_desk([0-9]+)_t$ = \1 hh_d_algo([0-9]+)_t$ = \1, regex) at(_coef) xtitle("Minutes walking distance from predicted location") ///
	ytitle("Change in household water contamination") legend(order(2 "Prediction by inspection" 4 "Prediction by algorithm")) levels(90)
	graph export "$path_graphs/`v'_het_dist.pdf", replace
}
eststo clear

////////////////////
//// table S15.4 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean
rename dif_ws_enterbac_fc_exp_wmean  dif_ws_bac_exp_wmean

foreach v of varlist dif_ws_arsenic_any_wmean dif_ws_bac_exp_wmean{
	eststo m_`v' : reg `v' inc1-inc5 inc*_t lu_2-lu_11 [aweight=pweight_normal_final] , cluster(village_code) nocons
}
#delimit ;
esttab m_* using "$path_tables/policy_impact_ws_het_bas_inc.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inc*_t) order(inc5_t inc3_t inc1_t inc2_t inc4_t)
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\;(WHO)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R2" "Obs") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S15.5 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean


foreach v of varlist dif_ws_hh_dist_f_wmean dif_ws_dist_min_wmean dif_source_test_stored dif_water_storage_d {
	eststo m_`v' : reg `v' inc1-inc5 inc*_t lu_2-lu_11 [aweight=pweight_normal_final] , cluster(village_code) nocons
}
#delimit ;
esttab m_* using "$path_tables/policy_impact_practice_het_bas_inc.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inc*_t) order(inc5_t inc3_t inc1_t inc2_t inc4_t)
	nomtitle 
	mgroups("\specialcell{\;Distance \\ HH-WS (m)}" "\specialcell{\;\;\;Distance \\ HH-WS (min)}" "\specialcell{Observed \\ \; storage}"
		"\specialcell{Reported \\ \; storage}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R2" "Obs") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear


////////////////////
//// table S15.6 ////
////////////////////
use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel
rename dif_ws_hh_distance_final_wmean dif_ws_hh_dist_f_wmean


foreach v of varlist dif_hh_arsenic_any dif_hh_enterbac_fc_exp {
	eststo m_`v' : reg `v' inc1-inc5 inc*_t lu_2-lu_11  [aweight=pweight_normal_final] , cluster(village_code)  nocons
}
#delimit ;
esttab m_* using "$path_tables/policy_impact_hh_het_bas_inc.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(inc*_t) order(inc5_t inc3_t inc1_t inc2_t inc4_t)
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\; Arsenic \\ contamination \\ \;\;\;\;\;(WHO)}" "\specialcell{\;\;\;\;\;\; Fecal \\ contamination}", 
		pattern(1 1 1 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats( r2 N, layout(@ @) labels( "R2" "Obs") fmt( %9.2f %9.0f ))
;
#delimit cr
eststo clear



///////////////////////////////////
//// table S16.1, S16.2 and S16.3 ////
///////////////////////////////////
clear 

use "$data/temp/temp_analysis.dta", clear

* select final sample
keep if complete_panel == 1 // final panel

rename final_distance_hh_constr_mins final_dist_hh_constr_mins
rename final_distance_hh_accepted_mins  dist_hh_accepted_mins
rename *ws_enterbac_fc_exp_wmean *ws_bac_exp_wmean
rename log_distance_hh_*anchor log_dist_hh_*anchor
rename ws_arsenic_50plus_wmean ws_arsenic_50_wmean
global weight pweight_normal_final

gen arsenic_cat = 0 if hh_arsenic_any == 0
replace arsenic_cat = 1 if  hh_arsenic_any == 1 & hh_arsenic_50plus == 0
replace arsenic_cat = 2 if  hh_arsenic_50plus == 1


foreach instrument in desk algo {

	gen predicted_switch_`instrument' = .

	foreach endog in FC dist_m dist_r {
	
		foreach var in treated control cash labour waiver {
		
			gen pred_`endog'_diff_`var'_`instrument' = .
		
		}
	}
}


forvalues i = 1(1)6 {
	forvalues j = 0(1)2 {
	
		quiet summarize water_per_day_share_cclpg if final_dist_hh_constr_mins > `i' -1 & dist_hh_accepted_mins <= `i' & arsenic_cat == `j'

		local switch_rate = r(mean) 

		foreach instrument in desk algo {

			replace pred_FC_diff_control_`instrument' = `switch_rate' * (0.36 - ws_bac_exp_wmean) if dist_hh_`instrument'anchor_mins > `i' -1 & dist_hh_`instrument'anchor_mins  <= `i' & arsenic_cat == `j'
		
			foreach dist in dist_m dist_r {
			
				if "`dist'" == "dist_m" {
				
					local dist_measure = ws_hh_dist_final_wmean_mins
				
				}
				
				if "`dist'" == "dist_r" {
				
					local dist_measure = dif_ws_dist_min_wmean
				
				}
			
		
				replace pred_`dist'_diff_control_`instrument' = `switch_rate' * (dist_hh_`instrument'anchor_mins - `dist_measure') if dist_hh_`instrument'anchor_mins > `i' -1 & dist_hh_`instrument'anchor_mins  <= `i' & arsenic_cat == `j'
			
			}
	
			replace predicted_switch_`instrument' = `switch_rate' if dist_hh_`instrument'anchor_mins > `i' -1 & dist_hh_`instrument'anchor_mins  <= `i' & arsenic_cat == `j'

		}
	}
}

foreach instrument in desk algo {

	replace pred_FC_diff_control_`instrument' = 0 if pred_FC_diff_control_`instrument' == . 
	replace predicted_switch_`instrument' = 0 if predicted_switch_`instrument' == . 

	foreach dist in dist_m dist_r {
		replace pred_`dist'_diff_control_`instrument' = 0 if pred_`dist'_diff_control_`instrument' == . 
		
	}
	
	foreach treatment in treated cash labour waiver {
	
		replace pred_FC_diff_`treatment'_`instrument' =  pred_FC_diff_control_`instrument' * `treatment'
		
		foreach dist in dist_m dist_r {

			replace pred_`dist'_diff_`treatment'_`instrument' =  pred_`dist'_diff_control_`instrument' * `treatment'
		
		}
		
		gen predicted_switch_`treatment'_`instrument' = predicted_switch_`instrument' * `treatment'
	
	}

	
}

//Create other controls for IV

gen fc_control = 0 - ws_bac_exp_wmean
gen fc_control_treated = fc_control  * treated 

foreach instrument in desk algo {

	gen dist_control_`instrument' =  dist_hh_`instrument'anchor_mins - ws_hh_dist_final_wmean_mins
	gen dist_control_`instrument'_treated = dist_control_`instrument' * treated
	
}	

//Gen shortnamed variables for regression

gen diff_ws_bac = dif_ws_bac_exp_wmean 
label var diff_ws_bac "Source fecal contamination"

gen diff_dist_m = dif_ws_hh_dist_final_wmean_mins 
label var diff_dist_m "Travel time in minutes, measured"

gen diff_dist_r = dif_ws_dist_min_wmean 
label var diff_dist_r "Travel time in minutes, reported"

gen diff_stored = dif_source_test_stored
label var diff_stored "Storage, observed"


//Diff in diff 


foreach dist in dist_m dist_r {

	//With storage controls 

	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code)
	
	estimates store e_did_sto_`dist'
	estadd local fe "No"


	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
	
	estimates store e_did_sto_`dist'_fe
	estadd local fe "Yes"

	//Without storage controls 

	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code)
	
	estimates store e_did_`dist'
	estadd local fe "No"


	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
	
	estimates store e_did_`dist'_fe
	estadd local fe "Yes"

}


foreach instrument in desk algo {
	label var dist_hh_`instrument'anchor_mins  "Distance to nearest predicted location"
	//Zeroth stage regressions for distance 
	reg final_dist_hh_constr_mins dist_hh_`instrument'anchor_mins [aweight=pweight_normal_final], cluster(village_code)
	estimates store e_dist_`instrument'
	reghdfe final_dist_hh_constr_mins dist_hh_`instrument'anchor_mins [aweight=pweight_normal_final], cluster(village_code) absorb(village_code)
	estimates store e_dist_`instrument'_fe
	//Zeroth stage regressions	
	label var predicted_switch_`instrument' "Predicted switch"
	label var predicted_switch_treated_`instrument' "Predicted switch x treated"
	reg water_per_day_share_cclpg predicted_switch_`instrument' if treated == 1 [aweight=pweight_normal_final], cluster(village_code)
	estimates store e_use_`instrument'
	reghdfe water_per_day_share_cclpg predicted_switch_`instrument' if treated == 1 [aweight=pweight_normal_final], cluster(village_code) absorb(village_code)
	estimates store e_use_`instrument'_fe
	//Zeroth stage regressions, by treatment arm	
	label var predicted_switch_cash_`instrument' "Predicted switch x cash"
	label var predicted_switch_labour_`instrument' "Predicted switch x labour"
	label var predicted_switch_waiver_`instrument' "Predicted switch x waiver"
	reg water_per_day_share_cclpg predicted_switch_cash_`instrument' predicted_switch_labour_`instrument' predicted_switch_waiver_`instrument' cash labour waiver if treated == 1  [aweight=pweight_normal_final], nocons cluster(village_code)
	estimates store e_use_cont_`instrument'
	reghdfe water_per_day_share_cclpg predicted_switch_cash_`instrument' predicted_switch_labour_`instrument' predicted_switch_waiver_`instrument' if treated == 1 [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
	estimates store e_use_cont_`instrument'_fe
	if "`instrument'" == "desk" {
		estadd local instr "Desk": e_dist_`instrument'* e_use*`instrument'*
	}
	if "`instrument'" == "algo" {
		estadd local instr "Algorithm": e_dist_`instrument'* e_use*`instrument'*
	}
	estadd local fe "No", replace: e_dist* e_use*	
	estadd local fe "Yes", replace: e*fe
}
// table S16.1
#delimit ;
esttab e_dist* using "$path_tables/zeroth_dist.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(dist* _cons) 
	nomtitle 
	rename(dist_hh_algoanchor_mins "dist_hh_deskanchor_mins")
	mgroups("\specialcell{\;\;\;\;\; Distance to nearest constructed source \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
// table S16.2
#delimit ;
esttab e_use_desk* e_use_algo* using "$path_tables/zeroth_use.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(predicted_switch* _cons) 
	nomtitle 
	rename(predicted_switch_algo "predicted_switch_desk")
	mgroups("\specialcell{\;\;\;\;\; Share of water from project source \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
// table S16.3
#delimit ;
esttab e_use_cont* using "$path_tables/zeroth_use_cont.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(predicted_switch* cash labour waiver _cons) 
	nomtitle 
	rename(predicted_switch_cash_algo "predicted_switch_cash_desk" predicted_switch_labour_algo "predicted_switch_labour_desk" predicted_switch_waiver_algo "predicted_switch_waiver_desk")
	mgroups("\specialcell{\;\;\;\;\; Share of water from project source \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
eststo clear

////////////////////
//// figure S16.1 ////
////////////////////

twoway lpolyci water_per_day_share_cclpg final_dist_hh_constr_mins  if treated == 1, xtitle("Walking dist from installed source in minutes") ///
	ytitle("Share of household water from project source") legend(off)
graph export "$path_graphs/takeup_with_dist.pdf", replace




	
///////////////////////////
//// table S16.4 and S16.6 ////
///////////////////////////

global treatments control cash labour waiver

rename ws_bac_exp_wmean ws_fc_exp_wmean
global vars2 hh_arsenic_any hh_arsenic_50plus hh_enterbac_fc_exp ws_arsenic_any_wmean ws_arsenic_50_wmean ws_fc_exp_wmean ws_treat_d_wmean /// 	
	source_test_stored collect_water_mins collect_per_day_qty
	
foreach t of newlist algo desk {
	foreach var of global vars { 
		global lab_`var' : var label `var'
		sum `var'
		global N_`var' = r(N)
		*
		xi: reg `var' cash labour waiver log_dist_hh_`t'anchor ldist_`t'anchor_cash ldist_`t'anchor_labour ldist_`t'anchor_waiver i.union_name ///
			[aweight=pweight_b] , cluster(village_code)
		foreach c of varlist cash labour waiver {
			gen `var'_c_`c'= trim("`: display %9.3g _b[ldist_`t'anchor_`c']'")
			global `var'_c_`c'= trim("`: display %9.3g _b[ldist_`t'anchor_`c']'")
			tostring `var'_c_`c', replace
			global `var'_sd_`c' = trim("`: display %9.3f _se[ldist_`t'anchor_`c']'")
			global pvalue = (2 * ttail(e(df_r), abs(_b[ldist_`t'anchor_`c']/_se[ldist_`t'anchor_`c']))) 
			if $pvalue < 0.01 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "***"
			}
			if $pvalue < 0.05 & $pvalue>=0.01 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "**"
			}
			if $pvalue < 0.1 & $pvalue>=0.05 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "*"
			}
			dis $pvalue
			tab `var'_c_`c'
			global `var'_c_`c' = `var'_c_`c'
			drop `var'_c_`c'
		}
		*
		xi: reg `var' treated log_dist_hh_`t'anchor ldist_`t'anchor_treated i.union_name [aweight=pweight_b] , cluster(village_code)
		gen `var'_c_t = trim("`: display %9.3g _b[ldist_`t'anchor_treated]'")
		tostring `var'_c_`c', replace
		global `var'_c_t = trim("`: display %9.3g _b[ldist_`t'anchor_treated]'")
		global `var'_sd_t = trim("`: display %9.3f _se[ldist_`t'anchor_treated]'") 
		global pvalue = (2 * ttail(e(df_r), abs(_b[ldist_`t'anchor_treated]/_se[ldist_`t'anchor_treated]))) 
		if $pvalue < 0.01 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "***"
		}
		if $pvalue < 0.05 & $pvalue>=0.01 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "**"
		}
		if $pvalue < 0.1 & $pvalue>=0.05 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "*"
		}
		global `var'_c_t = `var'_c_t
		drop `var'_c_t
	}
	* test for joint significance
	foreach j of varlist cash labour waiver treated {
		global interactions_`c' ""
		foreach v of global vars {
			capture gen `v'_`j' = `v' * `j'
			global interactions "${interactions_`c'} `v'_`j'" 
		}
	}
	foreach c of varlist cash labour waiver treated {
		xi: reg log_dist_hh_`t'anchor $vars $interactions_`c' `c'  i.union_name [aweight=pweight_b] if `c'==1 | control==1, cluster(village_code)
		test $interactions_`c' `c'
		global joint_pvalue_`c' = trim("`: display %9.3f r(p)'")
	}
	* export table 
	texdoc do "$dofiles/texdoc/balance_socioeconomic_`t'anchor.do"
}

*creates table "$dofiles/socioeconomic_balance_`t'anchor.do"
///////////////////////////
//// table S16.5 and S16.7 ////
///////////////////////////
global vars2 hh_arsenic_any hh_arsenic_50plus hh_enterbac_fc_exp ws_arsenic_any_wmean ws_arsenic_50_wmean ws_fc_exp_wmean ws_treat_d_wmean /// 	
	source_test_stored collect_water_mins collect_per_day_qty new_source_soc_wtp
	
label var hh_arsenic_any "Arsenic contamination (WHO threshold) (HH test)"
label var hh_arsenic_50plus "Arsenic contamination (Bangladeshi threshold) (HH test)"
label var hh_enterbac_fc_exp "Fecal contamination (HH test)"
label var ws_arsenic_any_wmean "WS Arsenic contamination (WHO threshold)"
label var ws_arsenic_50_wmean "WS Arsenic contamination (Bangladeshi threshold)"
label var ws_fc_exp_wmean "WS Fecal contamination"
label var ws_treat_d_wmean "The water is treated to make it safe for drinking"
label var source_test_stored "Household observed to store drinking water"
label var new_source_soc_wtp "WTP for a new WS in a socially optimal location"


foreach t of newlist algo desk {
	foreach var of global vars2 { 
		global lab_`var' : var label `var'
		sum `var'
		global N_`var' = r(N)
		*
		xi: reg `var' cash labour waiver log_dist_hh_`t'anchor ldist_`t'anchor_cash ldist_`t'anchor_labour ldist_`t'anchor_waiver i.union_name ///
			[aweight=pweight_b] , cluster(village_code)
		foreach c of varlist cash labour waiver {
			gen `var'_c_`c'= trim("`: display %9.3g _b[ldist_`t'anchor_`c']'")
			tostring `var'_c_`c', replace
			global `var'_c_`c'= trim("`: display %9.3g _b[ldist_`t'anchor_`c']'")
			global `var'_sd_`c' = trim("`: display %9.3f _se[ldist_`t'anchor_`c']'")
			global pvalue = (2 * ttail(e(df_r), abs(_b[ldist_`t'anchor_`c']/_se[ldist_`t'anchor_`c']))) 
			if $pvalue < 0.01 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "***"
			}
			if $pvalue < 0.05 & $pvalue>=0.01 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "**"
			}
			if $pvalue < 0.1 & $pvalue>=0.05 {
				replace `var'_c_`c' = "`: display ${`var'_c_`c'}'" + "*"
			}
			global `var'_c_`c' = `var'_c_`c'
			drop `var'_c_`c'
		}
		*
		xi: reg `var' treated log_dist_hh_`t'anchor ldist_`t'anchor_treated i.union_name [aweight=pweight_b] , cluster(village_code)
		gen `var'_c_t = trim("`: display %9.3g _b[ldist_`t'anchor_treated]'")
		tostring `var'_c_`c', replace
		global `var'_c_t = trim("`: display %9.3g _b[ldist_`t'anchor_treated]'")
		global `var'_sd_t = trim("`: display %9.3f _se[ldist_`t'anchor_treated]'") 
		global pvalue = (2 * ttail(e(df_r), abs(_b[ldist_`t'anchor_treated]/_se[ldist_`t'anchor_treated]))) 
		if $pvalue < 0.01 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "***"
		}
		if $pvalue < 0.05 & $pvalue>=0.01 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "**"
		}
		if $pvalue < 0.1 & $pvalue>=0.05 {
			replace `var'_c_t = "`: display ${`var'_c_t}'" + "*"
		}
		global `var'_c_t = `var'_c_t
		drop `var'_c_t
	}
	* test for joint significance
	foreach j of varlist cash labour waiver treated {
		global interactions_`c' ""
		foreach v of global vars2 {
			capture gen `v'_`j' = `v' * `j'
			global interactions "${interactions_`c'} `v'_`j'" 
		}
	}
	foreach c of varlist cash labour waiver treated {
		xi: reg log_dist_hh_`t'anchor $vars2 $interactions_`c' `c'  i.union_name [aweight=pweight_b] if `c'==1 | control==1, cluster(village_code)
		test $interactions_`c' `c'
		global joint_pvalue_`c' = trim("`: display %9.3f r(p)'")
	}
	* export table 
	texdoc do "$dofiles/texdoc/balance_water_`t'anchor.do"
}
**creates water_balance_deskanchor.tex and water_balance_algoanchor.tex



////////////////////////////////////
//// tables S16.8, S16.9, S16.10 and S16.11 ////
////////////////////////////////////

* calculate predicted values
foreach instrument in desk algo {
	reg diff_ws_bac pred_FC_diff_cash_`instrument' pred_FC_diff_labour_`instrument' pred_FC_diff_waiver_`instrument' [aweight=pweight_normal_final],  cluster(village_code) 
	capture drop pred_FC_diff_treated_d_`instrument'
	predict pred_FC_diff_treated_d_`instrument', xb
	foreach dist in dist_m dist_r {
		reg diff_`dist' pred_`dist'_diff_cash_`instrument' pred_`dist'_diff_labour_`instrument' pred_`dist'_diff_waiver_`instrument' [aweight=pweight_normal_final],  cluster(village_code) 	
		capture drop pred_`dist'_diff_treated_d_`instrument'
		predict pred_`dist'_diff_treated_d_`instrument', xb		
	}
}

//First stage regressions
foreach instrument in desk algo {
	label var pred_FC_diff_treated_d_`instrument' "Predicted change source fecal contamination x treated"
	label var pred_FC_diff_control_`instrument' "Predicted change source fecal contamination"	
	foreach endog in diff_ws_bac diff_dist_m diff_dist_r {
		foreach dist in dist_m dist_r {		
			label var pred_`dist'_diff_treated_d_`instrument' "Predicted change hh-ws distance x treated"
			label var pred_`dist'_diff_control_`instrument' "Predicted change hh-ws distance"
			local shortdist = substr("`dist'",6,1)
			reg `endog' pred_FC_diff_treated_d_`instrument' pred_FC_diff_control_`instrument' pred_`dist'_diff_treated_d_`instrument' pred_`dist'_diff_control_`instrument' [aweight=pweight_normal_final],  cluster(village_code) 
			estimates store e_1st_`endog'_`shortdist'_`instrument'	
			reghdfe `endog' pred_FC_diff_treated_d_`instrument' pred_FC_diff_control_`instrument' pred_`dist'_diff_treated_d_`instrument' pred_`dist'_diff_control_`instrument' [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
			estimates store e_1st_`endog'_`shortdist'_`instrument'_fe
		}
	}
}			
estadd local instr "Desk": e_1st*desk*
estadd local instr "Algorithm": e_1st*algo*
estadd local dist_measure "Reported": e_1st*dist_r*
estadd local dist_measure "Measured": e_1st*dist_m*	
estadd local fe "No", replace: e_1st*
estadd local fe "Yes", replace: e_1st*fe
// table P8
#delimit ;
esttab e_1st_diff_ws_bac_r* using "$path_tables/first_diff_ws_bac_r.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk" pred_dist_r_diff_treated_d_algo "pred_dist_r_diff_treated_d_desk" pred_dist_r_diff_control_algo "pred_dist_r_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in source fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
// table P9
#delimit ;
esttab e_1st_diff_ws_bac_m* using "$path_tables/first_diff_ws_bac_m.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk" pred_dist_m_diff_treated_d_algo "pred_dist_m_diff_treated_d_desk" pred_dist_m_diff_control_algo "pred_dist_m_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in source fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
// table P10
#delimit ;
esttab e_1st_diff_dist_r_r* using "$path_tables/first_diff_dist_r.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk" pred_dist_r_diff_treated_d_algo "pred_dist_r_diff_treated_d_desk" pred_dist_r_diff_control_algo "pred_dist_r_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in distance hh-ws \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr
// table P11
#delimit ;
esttab e_1st_diff_dist_m_m* using "$path_tables/first_diff_dist_m.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk" pred_dist_m_diff_treated_d_algo "pred_dist_m_diff_treated_d_desk" pred_dist_m_diff_control_algo "pred_dist_m_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in distance hh-ws \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr


/////////////////////////////
//// table S16.12 S16.13 S16.14 S16.15 ///
/////////////////////////////



//First stage regressions for fecal contamination, holding distance constant


foreach instrument in desk algo {
	
	foreach endog in diff_ws_bac {
	
		foreach dist in dist_m dist_r {
		
			local shortdist = substr("`dist'",6,1)
	
			reg `endog' pred_FC_diff_treated_d_`instrument' pred_FC_diff_control_`instrument' diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) 
			
			estimates store e_1st_FC_only_`shortdist'_`instrument'
			
			reghdfe `endog' pred_FC_diff_treated_d_`instrument' pred_FC_diff_control_`instrument' diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
			
			estimates store e_1st_FC_only_`shortdist'_`instrument'_fe
	
		}
		
	}
}	



estadd local instr "Desk": e_1st_FC*desk*
estadd local instr "Algorithm": e_1st_FC*algo*
	
estadd local dist_measure "Reported": e_1st_FC*_r_*
estadd local dist_measure "Measured": e_1st_FC*_m_*	

estadd local fe "No", replace: e_1st_FC*
estadd local fe "Yes", replace: e_1st_FC*fe

//First stage regressions for distance, holding fecal contamination constant


foreach instrument in desk algo {
	
	foreach endog in diff_dist_m diff_dist_r {
	
		foreach dist in dist_m dist_r {
		
			local shortdist = substr("`dist'",6,1)
	
			reg `endog' pred_`dist'_diff_treated_d_`instrument' pred_`dist'_diff_control_`instrument' diff_ws_bac [aweight=pweight_normal_final],  cluster(village_code) 
			
			estimates store e_1st_dist_`shortdist'_only_`shortdist'_`instrument'

			reghdfe `endog' pred_`dist'_diff_treated_d_`instrument' pred_`dist'_diff_control_`instrument' diff_ws_bac [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
			
			estimates store e_1st_dist_`shortdist'_only_`shortdist'_`instrument'_fe

		}
		
	}
}

estadd local instr "Desk": e_1st_dist*desk*
estadd local instr "Algorithm": e_1st_dist*algo*
	
estadd local dist_measure "Reported": e_1st_dist*_r_*
estadd local dist_measure "Measured": e_1st_dist*_m_*	

estadd local fe "No", replace: e_1st_dist*
estadd local fe "Yes", replace: e_1st_dist*fe

//table S16.12 - First stage: Predicted change in fecal contamination holding change in distance constant, reported distance measure

#delimit ;
esttab e_1st_FC*_r_* using "$path_tables/first_FC_only_r.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* diff* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in source fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R$^2$" "N") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr

//table S16.13 - First stage: Predicted change in fecal contamination holding change in distance constant, measured  distance measure

#delimit ;
esttab e_1st_FC*_m_* using "$path_tables/first_FC_only_m.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* diff* _cons) 
	nomtitle 
	wrap
	rename(pred_FC_diff_treated_d_algo "pred_FC_diff_treated_d_desk" pred_FC_diff_control_algo "pred_FC_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in source fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R$^2$" "N") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr

//table S16.14 - First stage: Predicted change in distance, holding change in fecal contamination constant, reported distance measure

#delimit ;
esttab e_1st_dist*_r_* using "$path_tables/first_dist_only_r.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* diff* _cons) 
	nomtitle 
	wrap
	rename(pred_dist_r_diff_treated_d_algo "pred_dist_r_diff_treated_d_desk" pred_dist_r_diff_control_algo "pred_dist_r_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in distance hh-ws \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R$^2$" "N") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr

//table S16.15 - First stage: Predicted change in distance, holding change in fecal contamination constant, measured distance measure

#delimit ;
esttab e_1st_dist*_m_* using "$path_tables/first_dist_only_m.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(pred* diff* _cons) 
	nomtitle 
	wrap
	rename(pred_dist_m_diff_treated_d_algo "pred_dist_m_diff_treated_d_desk" pred_dist_m_diff_control_algo "pred_dist_m_diff_control_desk")
	mgroups("\specialcell{\;\;\;\;\; Change in distance hh-ws \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(instr fe r2 N, layout(@ @) labels("Predicted location" "Treatment unit fixed effects" "R$^2$" "N") fmt(%9.0g %9.0g %9.2f %9.0f ));
;
#delimit cr


/////////////////////////////
//// table S16.16 S16.17 ////
/////////////////////////////

//IV regressions

capture rename pred*diff_control* pred*d_c*
capture rename pred*diff_treated_d* pred*d_t_d*

//Without storage controls
	
foreach instrument in desk algo {
	
	foreach dist in  dist_r dist_m { // problem: 
	
		//Both instrumented
	
		ivreg2 dif_hh_enterbac_fc_exp pred_FC_d_c_`instrument' pred_`dist'_d_c_`instrument' (diff_ws_bac diff_`dist' = pred_FC_d_t_d_`instrument' pred_`dist'_d_t_d_`instrument') [aweight=pweight_normal_final], cluster(village_code) first 
		
		estimates store e_iv_`dist'_`instrument'
		
		mat A = e(first)
		
		estadd scalar diff_ws_bac_first_SW = A[8,1]
		estadd scalar diff_ws_bac_first_AP = A[15,1]
		
		estadd scalar diff_dist_first_SW = A[8,2]
		estadd scalar diff_dist_first_AP = A[15,2]

	
		xtivreg2 dif_hh_enterbac_fc_exp pred_FC_d_c_`instrument' pred_`dist'_d_c_`instrument' (diff_ws_bac diff_`dist' = pred_FC_d_t_d_`instrument' pred_`dist'_d_t_d_`instrument') [aweight=pweight_normal_final],  cluster(village_code) first fe i(village_code_num)
		
		estimates store e_iv_`dist'_`instrument'_fe
				
		mat A = e(first)
		
		estadd scalar diff_ws_bac_first_SW = A[8,1]
		estadd scalar diff_ws_bac_first_AP = A[15,1]
		
		estadd scalar diff_dist_first_SW = A[8,2]
		estadd scalar diff_dist_first_AP = A[15,2]

	}

}

estadd local instr "Desk": e_iv*desk*
estadd local instr "Algorithm": e_iv*algo*
	
estadd local dist_measure "Reported": e_iv*_r_*
estadd local dist_measure "Measured": e_iv*_m_*	

estadd local fe "No", replace: e_iv*
estadd local fe "Yes", replace: e_iv*fe

//IV results

foreach dist in dist_m dist_r {
#delimit ;
esttab e_iv*`dist'* using "$path_tables/IV_`dist'.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(diff* _cons) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\; Drinking water fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(diff_ws_bac_first_SW diff_ws_bac_first_AP diff_dist_first_SW diff_dist_first_AP instr fe N , layout(@ @) labels("Sanderson-Windmeijer F-statistic: FC" "Angrist-Pischke F-statistic: FC" "Sanderson-Windmeijer F-statistic: Distance" "Angrist-Pischke F-statistic: Distance"  "Predicted location"  "Treatment unit fixed effects" "N") fmt(%9.2f %9.2f %9.2f %9.2f %9.0g %9.0g %9.0f ));
;
#delimit cr
}




/////////////////////////////
//// table S17.2 ////
/////////////////////////////

foreach dist in dist_m {
	//With storage controls 
	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code)
	estimates store e_did_sto_`dist'
	estadd local fe "No"
	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' diff_stored [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
	estimates store e_did_sto_`dist'_fe
	estadd local fe "Yes"
	//Without storage controls 
	reg dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code)
	estimates store e_did_`dist'
	estadd local fe "No"
	reghdfe dif_hh_enterbac_fc_exp diff_ws_bac diff_`dist' [aweight=pweight_normal_final],  cluster(village_code) absorb(village_code)
	estimates store e_did_`dist'_fe
	estadd local fe "Yes"
}
foreach dist in dist_m dist_r {
#delimit ;
esttab e_did*`dist'* using "$path_tables/DID_`dist'.tex", replace 
	cell(b(star fmt(3)) se(par fmt(3)) )
	label nogap number starlevels(* 0.1 ** 0.05 *** 0.01) 
	keep(diff* _cons) 
	nomtitle 
	mgroups("\specialcell{\;\;\;\;\; Drinking water fecal contamination \;\;\;\;\;}" , 
		pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) 
	collabels(none) stats(fe r2 N, layout(@ @) labels("Treatment unit fixed effects" "R2" "Obs") fmt(%9.0g %9.2f %9.0f ));
;
#delimit cr
}
eststo clear

